Skip to content
This repository was archived by the owner on Jun 19, 2026. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions docs/deduplication.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// PATCH: 此為 AI 生成的修復建議,請人工審查後合併
# Transaction Deduplication

## Overview
FinMind now supports intelligent transaction deduplication during imports and syncs. The system uses fuzzy string matching based on configurable fields (e.g., date, amount, description) to detect potentially duplicate transactions, reducing manual cleanup.

## Configuration
Deduplication settings can be controlled via environment variables:

- `DEDUPLICATION_THRESHOLD` (default: 0.9) – similarity score (0–1) above which a transaction is considered a duplicate.
- `DEDUPLICATION_FIELDS` (default: `date,amount,description,category`) – comma-separated list of fields used to build the comparison fingerprint.

The logic is implemented in `src/utils/deduplication.ts` and is used by the transaction import and sync services.

## How It Works
1. Builds a normalized string fingerprint for each transaction from the configured fields.
2. Computes the similarity between the incoming transaction fingerprint and existing ones using a string comparison algorithm (e.g., Dice coefficient).
3. If the highest similarity meets or exceeds the threshold, the transaction is flagged as duplicate.

## Extending
You can override the default threshold per call by passing a `threshold` option to the `deduplicateTransactions` function.
12 changes: 12 additions & 0 deletions src/config/deduplicationConfig.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// PATCH: 此為 AI 生成的修復建議,請人工審查後合併
export interface DeduplicationConfig {
threshold: number;
fields: string[];
}

const config: DeduplicationConfig = {
threshold: parseFloat(process.env.DEDUPLICATION_THRESHOLD || '0.9'),
fields: (process.env.DEDUPLICATION_FIELDS || 'date,amount,description,category').split(','),
};

export default config;
53 changes: 53 additions & 0 deletions src/services/transactionService.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// PATCH: 此為 AI 生成的修復建議,請人工審查後合併
import { deduplicateTransactions } from '../utils/deduplication';
import deduplicationConfig from '../config/deduplicationConfig';
import * as transactionRepo from '../repositories/transactionRepository';

interface Transaction {
id?: string;
date: string;
amount: number;
description: string;
category?: string;
accountId?: string;
}

export async function importTransactions(incoming: Transaction[]): Promise<{
imported: number;
duplicates: number;
}> {
const existing = await transactionRepo.getAll();
const { unique, duplicates } = deduplicateTransactions(
incoming,
existing,
deduplicationConfig
);

if (unique.length > 0) {
await transactionRepo.bulkCreate(unique);
}

return { imported: unique.length, duplicates: duplicates.length };
}

export async function syncTransactions(incoming: Transaction[]): Promise<{
synced: number;
duplicates: number;
}> {
const existing = await transactionRepo.getAll();
const { unique, duplicates } = deduplicateTransactions(
incoming,
existing,
deduplicationConfig
);

for (const tx of unique) {
if (tx.id) {
await transactionRepo.update(tx.id, tx);
} else {
await transactionRepo.create(tx);
}
}

return { synced: unique.length, duplicates: duplicates.length };
}
62 changes: 62 additions & 0 deletions src/utils/deduplication.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// PATCH: 此為 AI 生成的修復建議,請人工審查後合併
import stringSimilarity from 'string-similarity';

interface Transaction {
id?: string;
date: string;
amount: number;
description: string;
category?: string;
accountId?: string;
}

interface DeduplicationConfig {
threshold: number;
fields: string[];
}

const DEFAULT_CONFIG: DeduplicationConfig = {
threshold: 0.9,
fields: ['date', 'amount', 'description', 'category'],
};

function buildFingerprint(transaction: Transaction, fields: string[]): string {
const parts = fields.map(field => {
const value = transaction[field as keyof Transaction];
if (value === undefined || value === null) return '';
return String(value).trim().toLowerCase();
});
return parts.join('|');
}

export function deduplicateTransactions(
incoming: Transaction[],
existing: Transaction[],
config: Partial<DeduplicationConfig> = {}
): { unique: Transaction[]; duplicates: Transaction[] } {
const effectiveConfig = { ...DEFAULT_CONFIG, ...config };
const unique: Transaction[] = [];
const duplicates: Transaction[] = [];

for (const candidate of incoming) {
if (existing.length === 0) {
unique.push(candidate);
continue;
}

const candidateFingerprint = buildFingerprint(candidate, effectiveConfig.fields);
const similarities = existing.map(existingTx => {
const existingFingerprint = buildFingerprint(existingTx, effectiveConfig.fields);
return stringSimilarity.compareTwoStrings(candidateFingerprint, existingFingerprint);
});

const maxSimilarity = Math.max(...similarities);
if (maxSimilarity >= effectiveConfig.threshold) {
duplicates.push(candidate);
} else {
unique.push(candidate);
}
}

return { unique, duplicates };
}
54 changes: 54 additions & 0 deletions tests/utils/deduplication.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// PATCH: 此為 AI 生成的修復建議,請人工審查後合併
import { deduplicateTransactions } from '../../src/utils/deduplication';

const sampleExisting = [
{ date: '2023-01-01', amount: 29.99, description: 'Netflix subscription' },
{ date: '2023-01-02', amount: 50.00, description: 'Grocery store purchase' },
];

describe('deduplicateTransactions', () => {
it('should flag exact duplicates', () => {
const incoming = [
{ date: '2023-01-01', amount: 29.99, description: 'Netflix subscription' },
];
const result = deduplicateTransactions(incoming, sampleExisting);
expect(result.unique).toHaveLength(0);
expect(result.duplicates).toHaveLength(1);
});

it('should flag similar transactions above threshold', () => {
const incoming = [
{ date: '2023-01-01', amount: 30.00, description: 'Netflix monthly sub' },
];
const result = deduplicateTransactions(incoming, sampleExisting, { threshold: 0.8 });
expect(result.unique).toHaveLength(0);
expect(result.duplicates).toHaveLength(1);
});

it('should keep distinct transactions', () => {
const incoming = [
{ date: '2023-01-03', amount: 15.00, description: 'Coffee shop' },
];
const result = deduplicateTransactions(incoming, sampleExisting);
expect(result.unique).toHaveLength(1);
expect(result.duplicates).toHaveLength(0);
});

it('should respect custom threshold', () => {
const incoming = [
{ date: '2023-01-02', amount: 55.00, description: 'Grocery store' },
];
const result = deduplicateTransactions(incoming, sampleExisting, { threshold: 0.95 });
expect(result.unique).toHaveLength(1);
expect(result.duplicates).toHaveLength(0);
});

it('should handle empty existing list', () => {
const incoming = [
{ date: '2023-01-01', amount: 29.99, description: 'Netflix subscription' },
];
const result = deduplicateTransactions(incoming, []);
expect(result.unique).toHaveLength(1);
expect(result.duplicates).toHaveLength(0);
});
});