-
Notifications
You must be signed in to change notification settings - Fork 3.3k
feat(intelligence): add Duplicate Entity Detection #2953
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
aashir-athar
wants to merge
5
commits into
tinyhumansai:main
from
aashir-athar:feat/entity-duplicates
Closed
Changes from all commits
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
62cbd99
feat(intelligence): add Duplicate Entity Detection
aashir-athar b438c66
fix(entity-duplicates): add missing namespace selector i18n keys
aashir-athar 366f09f
Merge remote-tracking branch 'upstream/main' into pr/2953
codex 06c970d
fix(entity-duplicates): translate tab labels
codex 1f5bcc0
Merge remote-tracking branch 'upstream/main' into pr/2953
senamakel File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
66 changes: 66 additions & 0 deletions
66
app/src/components/intelligence/EntityDuplicatesPanel.test.tsx
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,66 @@ | ||
| import { fireEvent, render, screen } from '@testing-library/react'; | ||
| import { describe, expect, it, vi } from 'vitest'; | ||
|
|
||
| import { computeEntityDuplicates } from '../../lib/memory/entityDuplicates'; | ||
| import type { GraphRelation } from '../../utils/tauriCommands/memory'; | ||
| import EntityDuplicatesPanel from './EntityDuplicatesPanel'; | ||
|
|
||
| function rel(subject: string, object: string): GraphRelation { | ||
| return { | ||
| namespace: 'n', | ||
| subject, | ||
| predicate: 'p', | ||
| object, | ||
| attrs: {}, | ||
| updatedAt: 0, | ||
| evidenceCount: 1, | ||
| orderIndex: null, | ||
| documentIds: [], | ||
| chunkIds: [], | ||
| }; | ||
| } | ||
|
|
||
| const dupReport = computeEntityDuplicates([ | ||
| rel('Alice', 'Bob'), | ||
| rel('alice', 'Carol'), | ||
| rel(' Alice ', 'Dave'), | ||
| ]); | ||
|
|
||
| describe('<EntityDuplicatesPanel />', () => { | ||
| it('renders the loading skeleton', () => { | ||
| render(<EntityDuplicatesPanel report={null} loading />); | ||
| expect(screen.getByTestId('entity-duplicates-loading')).toBeInTheDocument(); | ||
| }); | ||
|
|
||
| it('renders the empty state when there is no graph', () => { | ||
| render(<EntityDuplicatesPanel report={computeEntityDuplicates([])} />); | ||
| expect(screen.getByText('No knowledge graph yet.')).toBeInTheDocument(); | ||
| }); | ||
|
|
||
| it('renders an error with a working retry button', () => { | ||
| const onRetry = vi.fn(); | ||
| render(<EntityDuplicatesPanel report={null} error="graph unavailable" onRetry={onRetry} />); | ||
| expect(screen.getByRole('alert').textContent).toMatch(/graph unavailable/); | ||
| fireEvent.click(screen.getByRole('button', { name: 'Retry' })); | ||
| expect(onRetry).toHaveBeenCalledTimes(1); | ||
| }); | ||
|
|
||
| it('renders the all-clean message when entities exist but no duplicates', () => { | ||
| const clean = computeEntityDuplicates([rel('Alice', 'Bob')]); | ||
| render(<EntityDuplicatesPanel report={clean} />); | ||
| expect( | ||
| screen.getByText('No duplicate spellings detected — your entities look clean.') | ||
| ).toBeInTheDocument(); | ||
| }); | ||
|
|
||
| it('renders duplicate clusters with their variants', () => { | ||
| render(<EntityDuplicatesPanel report={dupReport} />); | ||
| expect(screen.getByText('Entities')).toBeInTheDocument(); | ||
| expect(screen.getByText('Duplicate sets')).toBeInTheDocument(); | ||
| expect(screen.getByText('Likely duplicate entities')).toBeInTheDocument(); | ||
| // All three spelling variants render. 'Alice' and ' Alice ' both normalize | ||
| // to the same visible text under Testing Library, so there are two of them. | ||
| expect(screen.getByText('alice')).toBeInTheDocument(); | ||
| expect(screen.getAllByText('Alice')).toHaveLength(2); | ||
| }); | ||
| }); |
172 changes: 172 additions & 0 deletions
172
app/src/components/intelligence/EntityDuplicatesPanel.tsx
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,172 @@ | ||
| /** | ||
| * Duplicate Entity Detection — presentational view. Pure: renders the duplicate | ||
| * clusters (variant chips + degree) and summary tiles. No data fetching, no | ||
| * clock, no RNG. | ||
| */ | ||
| import { useT } from '../../lib/i18n/I18nContext'; | ||
| import type { DuplicateReport } from '../../lib/memory/entityDuplicates'; | ||
|
|
||
| const MAX_CLUSTERS = 50; | ||
|
|
||
| interface EntityDuplicatesPanelProps { | ||
| report: DuplicateReport | null; | ||
| loading?: boolean; | ||
| error?: string | null; | ||
| onRetry?: () => void; | ||
| } | ||
|
|
||
| const EntityDuplicatesPanel = ({ report, loading, error, onRetry }: EntityDuplicatesPanelProps) => { | ||
| const { t } = useT(); | ||
|
|
||
| const intro = ( | ||
| <div | ||
| role="note" | ||
| className="rounded-lg border border-primary-200 dark:border-primary-500/30 bg-primary-50 dark:bg-primary-500/10 px-3 py-2 text-xs text-stone-700 dark:text-neutral-200"> | ||
| <p className="font-medium mb-1">{t('entityDuplicates.title')}</p> | ||
| <p>{t('entityDuplicates.intro')}</p> | ||
| </div> | ||
| ); | ||
|
|
||
| if (loading) { | ||
| return ( | ||
| <div className="space-y-4"> | ||
| {intro} | ||
| <div | ||
| className="space-y-3" | ||
| role="status" | ||
| aria-label={t('entityDuplicates.loading')} | ||
| data-testid="entity-duplicates-loading"> | ||
| <div className="grid gap-2 sm:grid-cols-3"> | ||
| {[0, 1, 2].map(i => ( | ||
| <div | ||
| key={i} | ||
| className="animate-pulse rounded-lg border border-stone-200 dark:border-neutral-800 bg-stone-50 dark:bg-neutral-800/60 h-16" | ||
| /> | ||
| ))} | ||
| </div> | ||
| {[0, 1, 2].map(i => ( | ||
| <div | ||
| key={i} | ||
| className="animate-pulse rounded-lg border border-stone-200 dark:border-neutral-800 bg-stone-50 dark:bg-neutral-800/60 h-12" | ||
| /> | ||
| ))} | ||
| </div> | ||
| </div> | ||
| ); | ||
| } | ||
|
|
||
| if (error) { | ||
| return ( | ||
| <div className="space-y-4"> | ||
| {intro} | ||
| <div className="rounded-lg border border-coral-200 dark:border-coral-500/30 p-4 text-center"> | ||
| <p role="alert" className="text-xs text-coral-700 dark:text-coral-300"> | ||
| {t('entityDuplicates.errorPrefix')} {error} | ||
| </p> | ||
| {onRetry && ( | ||
| <button | ||
| type="button" | ||
| onClick={onRetry} | ||
| className="mt-2 rounded-lg bg-primary-500 px-3 py-1.5 text-xs font-semibold text-white hover:bg-primary-600"> | ||
| {t('entityDuplicates.retry')} | ||
| </button> | ||
| )} | ||
| </div> | ||
| </div> | ||
| ); | ||
| } | ||
|
|
||
| if (!report || report.entityCount === 0) { | ||
| return ( | ||
| <div className="space-y-4"> | ||
| {intro} | ||
| <div className="py-8 text-center"> | ||
| <h3 className="text-sm font-semibold text-stone-700 dark:text-neutral-200"> | ||
| {t('entityDuplicates.empty')} | ||
| </h3> | ||
| <p className="mt-1 text-xs text-stone-500 dark:text-neutral-400"> | ||
| {t('entityDuplicates.emptyHint')} | ||
| </p> | ||
| </div> | ||
| </div> | ||
| ); | ||
| } | ||
|
|
||
| const clusters = report.clusters.slice(0, MAX_CLUSTERS); | ||
| const truncated = report.clusters.length > MAX_CLUSTERS; | ||
|
|
||
| return ( | ||
| <div className="space-y-4"> | ||
| {intro} | ||
|
|
||
| {/* Summary tiles */} | ||
| <div className="grid gap-2 sm:grid-cols-3"> | ||
| {[ | ||
| { label: t('entityDuplicates.metricEntities'), value: report.entityCount }, | ||
| { label: t('entityDuplicates.metricClusters'), value: report.clusterCount }, | ||
| { label: t('entityDuplicates.metricAffected'), value: report.affectedEntities }, | ||
| ].map(tile => ( | ||
| <div | ||
| key={tile.label} | ||
| className="rounded-lg border border-stone-200 dark:border-neutral-800 p-3"> | ||
| <div className="text-[10px] uppercase tracking-wider text-stone-400 dark:text-neutral-500"> | ||
| {tile.label} | ||
| </div> | ||
| <div className="text-lg font-semibold tabular-nums text-stone-900 dark:text-neutral-100"> | ||
| {tile.value} | ||
| </div> | ||
| </div> | ||
| ))} | ||
| </div> | ||
|
|
||
| {report.clusterCount === 0 ? ( | ||
| <p className="py-4 text-center text-sm text-sage-700 dark:text-sage-300"> | ||
| {t('entityDuplicates.allClean')} | ||
| </p> | ||
| ) : ( | ||
| <section aria-labelledby="entity-duplicates-heading" className="space-y-1.5"> | ||
| <h3 | ||
| id="entity-duplicates-heading" | ||
| className="text-xs font-semibold uppercase tracking-wider text-stone-500 dark:text-neutral-400"> | ||
| {t('entityDuplicates.heading')} | ||
| </h3> | ||
| <ul className="space-y-1.5"> | ||
| {clusters.map(cluster => ( | ||
| <li | ||
| key={cluster.normalized} | ||
| className="rounded-lg border border-stone-200 dark:border-neutral-800 px-3 py-2"> | ||
| <div className="flex flex-wrap items-center gap-1.5"> | ||
| {cluster.variants.map(variant => ( | ||
| <span | ||
| key={variant.id} | ||
| title={t('entityDuplicates.variantTitle').replace( | ||
| '{degree}', | ||
| String(variant.degree) | ||
| )} | ||
| className="inline-flex items-center gap-1 rounded-md border border-stone-200 dark:border-neutral-700 px-1.5 py-0.5 text-[11px] text-stone-800 dark:text-neutral-100"> | ||
| <span className="break-words"> | ||
| {variant.id || t('entityDuplicates.blankEntity')} | ||
| </span> | ||
| <span className="tabular-nums text-stone-400 dark:text-neutral-500"> | ||
| {variant.degree} | ||
| </span> | ||
| </span> | ||
| ))} | ||
| </div> | ||
| </li> | ||
| ))} | ||
| </ul> | ||
| {truncated && ( | ||
| <p className="text-center text-xs text-stone-400 dark:text-neutral-500"> | ||
| {t('entityDuplicates.truncated') | ||
| .replace('{shown}', String(clusters.length)) | ||
| .replace('{total}', String(report.clusterCount))} | ||
| </p> | ||
| )} | ||
| </section> | ||
| )} | ||
| </div> | ||
| ); | ||
| }; | ||
|
|
||
| export default EntityDuplicatesPanel; | ||
61 changes: 61 additions & 0 deletions
61
app/src/components/intelligence/EntityDuplicatesTab.test.tsx
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,61 @@ | ||
| import { fireEvent, render, screen, waitFor } from '@testing-library/react'; | ||
| import { beforeEach, describe, expect, it, vi } from 'vitest'; | ||
|
|
||
| import { computeEntityDuplicates } from '../../lib/memory/entityDuplicates'; | ||
| import type { GraphRelation } from '../../utils/tauriCommands/memory'; | ||
| import EntityDuplicatesTab from './EntityDuplicatesTab'; | ||
|
|
||
| const mockLoad = vi.fn(); | ||
| const mockLoadNamespaces = vi.fn(); | ||
|
|
||
| vi.mock('../../services/api/entityDuplicatesApi', () => ({ | ||
| loadEntityDuplicates: (...args: unknown[]) => mockLoad(...args), | ||
| loadNamespaces: (...args: unknown[]) => mockLoadNamespaces(...args), | ||
| })); | ||
|
|
||
| function rel(subject: string, object: string): GraphRelation { | ||
| return { | ||
| namespace: 'n', | ||
| subject, | ||
| predicate: 'p', | ||
| object, | ||
| attrs: {}, | ||
| updatedAt: 0, | ||
| evidenceCount: 1, | ||
| orderIndex: null, | ||
| documentIds: [], | ||
| chunkIds: [], | ||
| }; | ||
| } | ||
|
|
||
| const report = computeEntityDuplicates([rel('Alice', 'Bob'), rel('alice', 'Carol')]); | ||
|
|
||
| describe('<EntityDuplicatesTab />', () => { | ||
| beforeEach(() => { | ||
| mockLoad.mockReset(); | ||
| mockLoadNamespaces.mockReset(); | ||
| mockLoad.mockResolvedValue(report); | ||
| mockLoadNamespaces.mockResolvedValue([]); | ||
| }); | ||
|
|
||
| it('loads on mount and renders the clusters', async () => { | ||
| render(<EntityDuplicatesTab />); | ||
| expect(mockLoad).toHaveBeenCalledWith(undefined); | ||
| await waitFor(() => expect(screen.getByText('Likely duplicate entities')).toBeInTheDocument()); | ||
| }); | ||
|
|
||
| it('shows the namespace selector and re-queries on change', async () => { | ||
| mockLoadNamespaces.mockResolvedValueOnce(['work', 'personal']); | ||
| render(<EntityDuplicatesTab />); | ||
| await waitFor(() => screen.getByRole('combobox')); | ||
| fireEvent.change(screen.getByRole('combobox'), { target: { value: 'work' } }); | ||
| await waitFor(() => expect(mockLoad).toHaveBeenCalledWith('work')); | ||
| }); | ||
|
|
||
| it('surfaces an error when the load fails', async () => { | ||
| mockLoad.mockReset(); | ||
| mockLoad.mockRejectedValueOnce(new Error('graph unavailable')); | ||
| render(<EntityDuplicatesTab />); | ||
| await waitFor(() => expect(screen.getByRole('alert').textContent).toMatch(/graph unavailable/)); | ||
| }); | ||
| }); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,79 @@ | ||
| /** | ||
| * Duplicate Entity Detection tab (container). Load-on-mount + namespace | ||
| * selector; delegates rendering to the pure <EntityDuplicatesPanel>. Read-only. | ||
| */ | ||
| import { useCallback, useEffect, useRef, useState } from 'react'; | ||
|
|
||
| import { useT } from '../../lib/i18n/I18nContext'; | ||
| import type { DuplicateReport } from '../../lib/memory/entityDuplicates'; | ||
| import { loadEntityDuplicates, loadNamespaces } from '../../services/api/entityDuplicatesApi'; | ||
| import EntityDuplicatesPanel from './EntityDuplicatesPanel'; | ||
|
|
||
| const EntityDuplicatesTab = () => { | ||
| const { t } = useT(); | ||
| const [report, setReport] = useState<DuplicateReport | null>(null); | ||
| const [loading, setLoading] = useState(true); | ||
| const [error, setError] = useState<string | null>(null); | ||
| const [namespaces, setNamespaces] = useState<string[]>([]); | ||
| const [namespace, setNamespace] = useState(''); | ||
| // Monotonic token: ignore a response if a newer load has since started. | ||
| const latestRequestId = useRef(0); | ||
|
|
||
| const load = useCallback(async (ns: string) => { | ||
| const requestId = (latestRequestId.current += 1); | ||
| setLoading(true); | ||
| setError(null); | ||
| try { | ||
| const next = await loadEntityDuplicates(ns || undefined); | ||
| if (requestId !== latestRequestId.current) return; | ||
| setReport(next); | ||
| } catch (err) { | ||
| if (requestId !== latestRequestId.current) return; | ||
| setError(err instanceof Error ? err.message : String(err)); | ||
| } finally { | ||
| if (requestId === latestRequestId.current) setLoading(false); | ||
| } | ||
| }, []); | ||
|
|
||
| useEffect(() => { | ||
| loadNamespaces() | ||
| .then(setNamespaces) | ||
| .catch(() => setNamespaces([])); | ||
| void load(''); | ||
| }, [load]); | ||
|
|
||
| const handleNamespace = (next: string): void => { | ||
| setNamespace(next); | ||
| void load(next); | ||
| }; | ||
|
|
||
| return ( | ||
| <div className="space-y-4"> | ||
| {namespaces.length > 0 && ( | ||
| <label className="flex items-center gap-2 text-xs text-stone-600 dark:text-neutral-300"> | ||
| {t('entityDuplicates.namespaceLabel')} | ||
| <select | ||
| value={namespace} | ||
| onChange={e => handleNamespace(e.target.value)} | ||
| className="rounded-lg border border-stone-200 dark:border-neutral-700 bg-white dark:bg-neutral-900 px-2 py-1 text-sm text-stone-800 dark:text-neutral-100"> | ||
| <option value="">{t('entityDuplicates.namespaceAll')}</option> | ||
| {namespaces.map(ns => ( | ||
| <option key={ns} value={ns}> | ||
| {ns} | ||
| </option> | ||
| ))} | ||
| </select> | ||
| </label> | ||
| )} | ||
|
|
||
| <EntityDuplicatesPanel | ||
| report={report} | ||
| loading={loading} | ||
| error={error} | ||
| onRetry={() => void load(namespace)} | ||
| /> | ||
| </div> | ||
| ); | ||
| }; | ||
|
|
||
| export default EntityDuplicatesTab; |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.