From d2cdd34f7380e42d29955464ec191e643d9f1fbe Mon Sep 17 00:00:00 2001 From: yoshiha-ji Date: Fri, 29 May 2026 10:41:50 +0800 Subject: [PATCH] fix: apply bounty patch for issue #113 Automated fix generated by Bounty Sniffer + DeepSeek analysis. Target: rohitdash08/FinMind#113 Files: src/utils/deduplication.ts, src/config/deduplicationConfig.ts, src/services/transactionService.ts, tests/utils/deduplication.test.ts, docs/deduplication.md --- docs/deduplication.md | 21 ++++++++++ src/config/deduplicationConfig.ts | 12 ++++++ src/services/transactionService.ts | 53 +++++++++++++++++++++++++ src/utils/deduplication.ts | 62 ++++++++++++++++++++++++++++++ tests/utils/deduplication.test.ts | 54 ++++++++++++++++++++++++++ 5 files changed, 202 insertions(+) create mode 100644 docs/deduplication.md create mode 100644 src/config/deduplicationConfig.ts create mode 100644 src/services/transactionService.ts create mode 100644 src/utils/deduplication.ts create mode 100644 tests/utils/deduplication.test.ts diff --git a/docs/deduplication.md b/docs/deduplication.md new file mode 100644 index 000000000..37fc2edc5 --- /dev/null +++ b/docs/deduplication.md @@ -0,0 +1,21 @@ +// PATCH: 此為 AI 生成的修復建議,請人工審查後合併 +# Transaction Deduplication + +## Overview +FinMind now supports intelligent transaction deduplication during imports and syncs. The system uses fuzzy string matching based on configurable fields (e.g., date, amount, description) to detect potentially duplicate transactions, reducing manual cleanup. + +## Configuration +Deduplication settings can be controlled via environment variables: + +- `DEDUPLICATION_THRESHOLD` (default: 0.9) – similarity score (0–1) above which a transaction is considered a duplicate. +- `DEDUPLICATION_FIELDS` (default: `date,amount,description,category`) – comma-separated list of fields used to build the comparison fingerprint. + +The logic is implemented in `src/utils/deduplication.ts` and is used by the transaction import and sync services. + +## How It Works +1. Builds a normalized string fingerprint for each transaction from the configured fields. +2. Computes the similarity between the incoming transaction fingerprint and existing ones using a string comparison algorithm (e.g., Dice coefficient). +3. If the highest similarity meets or exceeds the threshold, the transaction is flagged as duplicate. + +## Extending +You can override the default threshold per call by passing a `threshold` option to the `deduplicateTransactions` function. diff --git a/src/config/deduplicationConfig.ts b/src/config/deduplicationConfig.ts new file mode 100644 index 000000000..064e1b1e4 --- /dev/null +++ b/src/config/deduplicationConfig.ts @@ -0,0 +1,12 @@ +// PATCH: 此為 AI 生成的修復建議,請人工審查後合併 +export interface DeduplicationConfig { + threshold: number; + fields: string[]; +} + +const config: DeduplicationConfig = { + threshold: parseFloat(process.env.DEDUPLICATION_THRESHOLD || '0.9'), + fields: (process.env.DEDUPLICATION_FIELDS || 'date,amount,description,category').split(','), +}; + +export default config; diff --git a/src/services/transactionService.ts b/src/services/transactionService.ts new file mode 100644 index 000000000..d909880b9 --- /dev/null +++ b/src/services/transactionService.ts @@ -0,0 +1,53 @@ +// PATCH: 此為 AI 生成的修復建議,請人工審查後合併 +import { deduplicateTransactions } from '../utils/deduplication'; +import deduplicationConfig from '../config/deduplicationConfig'; +import * as transactionRepo from '../repositories/transactionRepository'; + +interface Transaction { + id?: string; + date: string; + amount: number; + description: string; + category?: string; + accountId?: string; +} + +export async function importTransactions(incoming: Transaction[]): Promise<{ + imported: number; + duplicates: number; +}> { + const existing = await transactionRepo.getAll(); + const { unique, duplicates } = deduplicateTransactions( + incoming, + existing, + deduplicationConfig + ); + + if (unique.length > 0) { + await transactionRepo.bulkCreate(unique); + } + + return { imported: unique.length, duplicates: duplicates.length }; +} + +export async function syncTransactions(incoming: Transaction[]): Promise<{ + synced: number; + duplicates: number; +}> { + const existing = await transactionRepo.getAll(); + const { unique, duplicates } = deduplicateTransactions( + incoming, + existing, + deduplicationConfig + ); + + for (const tx of unique) { + if (tx.id) { + await transactionRepo.update(tx.id, tx); + } else { + await transactionRepo.create(tx); + } + } + + return { synced: unique.length, duplicates: duplicates.length }; +} diff --git a/src/utils/deduplication.ts b/src/utils/deduplication.ts new file mode 100644 index 000000000..ebab24fc7 --- /dev/null +++ b/src/utils/deduplication.ts @@ -0,0 +1,62 @@ +// PATCH: 此為 AI 生成的修復建議,請人工審查後合併 +import stringSimilarity from 'string-similarity'; + +interface Transaction { + id?: string; + date: string; + amount: number; + description: string; + category?: string; + accountId?: string; +} + +interface DeduplicationConfig { + threshold: number; + fields: string[]; +} + +const DEFAULT_CONFIG: DeduplicationConfig = { + threshold: 0.9, + fields: ['date', 'amount', 'description', 'category'], +}; + +function buildFingerprint(transaction: Transaction, fields: string[]): string { + const parts = fields.map(field => { + const value = transaction[field as keyof Transaction]; + if (value === undefined || value === null) return ''; + return String(value).trim().toLowerCase(); + }); + return parts.join('|'); +} + +export function deduplicateTransactions( + incoming: Transaction[], + existing: Transaction[], + config: Partial = {} +): { unique: Transaction[]; duplicates: Transaction[] } { + const effectiveConfig = { ...DEFAULT_CONFIG, ...config }; + const unique: Transaction[] = []; + const duplicates: Transaction[] = []; + + for (const candidate of incoming) { + if (existing.length === 0) { + unique.push(candidate); + continue; + } + + const candidateFingerprint = buildFingerprint(candidate, effectiveConfig.fields); + const similarities = existing.map(existingTx => { + const existingFingerprint = buildFingerprint(existingTx, effectiveConfig.fields); + return stringSimilarity.compareTwoStrings(candidateFingerprint, existingFingerprint); + }); + + const maxSimilarity = Math.max(...similarities); + if (maxSimilarity >= effectiveConfig.threshold) { + duplicates.push(candidate); + } else { + unique.push(candidate); + } + } + + return { unique, duplicates }; +} diff --git a/tests/utils/deduplication.test.ts b/tests/utils/deduplication.test.ts new file mode 100644 index 000000000..d53635d99 --- /dev/null +++ b/tests/utils/deduplication.test.ts @@ -0,0 +1,54 @@ +// PATCH: 此為 AI 生成的修復建議,請人工審查後合併 +import { deduplicateTransactions } from '../../src/utils/deduplication'; + +const sampleExisting = [ + { date: '2023-01-01', amount: 29.99, description: 'Netflix subscription' }, + { date: '2023-01-02', amount: 50.00, description: 'Grocery store purchase' }, +]; + +describe('deduplicateTransactions', () => { + it('should flag exact duplicates', () => { + const incoming = [ + { date: '2023-01-01', amount: 29.99, description: 'Netflix subscription' }, + ]; + const result = deduplicateTransactions(incoming, sampleExisting); + expect(result.unique).toHaveLength(0); + expect(result.duplicates).toHaveLength(1); + }); + + it('should flag similar transactions above threshold', () => { + const incoming = [ + { date: '2023-01-01', amount: 30.00, description: 'Netflix monthly sub' }, + ]; + const result = deduplicateTransactions(incoming, sampleExisting, { threshold: 0.8 }); + expect(result.unique).toHaveLength(0); + expect(result.duplicates).toHaveLength(1); + }); + + it('should keep distinct transactions', () => { + const incoming = [ + { date: '2023-01-03', amount: 15.00, description: 'Coffee shop' }, + ]; + const result = deduplicateTransactions(incoming, sampleExisting); + expect(result.unique).toHaveLength(1); + expect(result.duplicates).toHaveLength(0); + }); + + it('should respect custom threshold', () => { + const incoming = [ + { date: '2023-01-02', amount: 55.00, description: 'Grocery store' }, + ]; + const result = deduplicateTransactions(incoming, sampleExisting, { threshold: 0.95 }); + expect(result.unique).toHaveLength(1); + expect(result.duplicates).toHaveLength(0); + }); + + it('should handle empty existing list', () => { + const incoming = [ + { date: '2023-01-01', amount: 29.99, description: 'Netflix subscription' }, + ]; + const result = deduplicateTransactions(incoming, []); + expect(result.unique).toHaveLength(1); + expect(result.duplicates).toHaveLength(0); + }); +});