Skip to content

Commit

Permalink
Found a way to use more diffing algos
Browse files Browse the repository at this point in the history
  • Loading branch information
nikitaindik committed Nov 27, 2023
1 parent e4761fb commit be5cad2
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 39 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,39 @@
*/

import { findIndex, flatMap, flatten } from 'lodash';
import DiffMatchPatch, { Diff } from 'diff-match-patch';
import DiffMatchPatch from 'diff-match-patch';
import type { Diff } from 'diff-match-patch';
import 'diff-match-patch-line-and-word';
import * as diff from 'diff';
import type { Change } from 'diff';
import { isDelete, isInsert, isNormal, pickRanges } from 'react-diff-view';
import type { ChangeData, HunkData, RangeTokenNode, TokenizeEnhancer } from 'react-diff-view';

interface JsDiff {
diffChars: (oldStr: string, newStr: string) => Change[];
diffWords: (oldStr: string, newStr: string) => Change[];
diffWordsWithSpace: (oldStr: string, newStr: string) => Change[];
diffLines: (oldStr: string, newStr: string) => Change[];
diffTrimmedLines: (oldStr: string, newStr: string) => Change[];
diffSentences: (oldStr: string, newStr: string) => Change[];
diffCss: (oldStr: string, newStr: string) => Change[];
diffJson: (oldObject: Record<string, unknown>, newObject: Record<string, unknown>) => Change[];
}

const jsDiff: JsDiff = diff;

export enum DiffMethod {
CHARS = 'diffChars',
WORDS = 'diffWords',
WORDS_WITH_SPACE = 'diffWordsWithSpace',
LINES = 'diffLines',
TRIMMED_LINES = 'diffTrimmedLines',
SENTENCES = 'diffSentences',
CSS = 'diffCss',
JSON = 'diffJson',
WORDS_CUSTOM_USING_DMP = 'diffWordsCustomUsingDmp',
}

const { DIFF_EQUAL, DIFF_DELETE, DIFF_INSERT } = DiffMatchPatch;

function findChangeBlocks(changes: ChangeData[]): ChangeData[][] {
Expand All @@ -31,6 +59,7 @@ function findChangeBlocks(changes: ChangeData[]): ChangeData[][] {

function groupDiffs(diffs: Diff[]): [Diff[], Diff[]] {
return diffs.reduce<[Diff[], Diff[]]>(
// eslint-disable-next-line @typescript-eslint/no-shadow
([oldDiffs, newDiffs], diff) => {
const [type] = diff;

Expand Down Expand Up @@ -100,6 +129,10 @@ function convertToLinesOfEdits(linesOfDiffs: Diff[][], startLineNumber: number)
return flatMap(linesOfDiffs, (diffs, i) => diffsToEdits(diffs, startLineNumber + i));
}

/*
UPDATE: I figured that there's a way to do it without relying on "diff-match-patch-line-and-word".
See a new function "diffBy" below. Leaving this function here for comparison.
*/
function diffByWord(x: string, y: string): [Diff[], Diff[]] {
/*
This is a modified version of "diffText" from react-diff-view.
Expand All @@ -122,7 +155,21 @@ function diffByWord(x: string, y: string): [Diff[], Diff[]] {
return groupDiffs(diffs);
}

function diffChangeBlock(changes: ChangeData[]): [RangeTokenNode[], RangeTokenNode[]] {
function diffBy(diffMethod: DiffMethod, x: string, y: string): [Diff[], Diff[]] {
const jsDiffChanges: Change[] = jsDiff[diffMethod](x, y);
const diffs: Diff[] = diff.convertChangesToDMP(jsDiffChanges);

if (diffs.length <= 1) {
return [[], []];
}

return groupDiffs(diffs);
}

function diffChangeBlock(
changes: ChangeData[],
diffMethod: DiffMethod
): [RangeTokenNode[], RangeTokenNode[]] {
/* Convert ChangeData array to two strings representing old source and new source of a change block, like
"created_at": "2023-11-20T16:47:52.801Z",
Expand All @@ -144,7 +191,10 @@ function diffChangeBlock(changes: ChangeData[]): [RangeTokenNode[], RangeTokenNo
['', '']
);

const [oldDiffs, newDiffs] = diffByWord(oldSource, newSource); // <-- That's basically the only change I made to allow word-level diffing
const [oldDiffs, newDiffs] =
diffMethod === DiffMethod.WORDS_CUSTOM_USING_DMP // <-- That's basically the only change I made to allow word-level diffing
? diffByWord(oldSource, newSource)
: diffBy(diffMethod, oldSource, newSource);

if (oldDiffs.length === 0 && newDiffs.length === 0) {
return [[], []];
Expand All @@ -170,20 +220,22 @@ function diffChangeBlock(changes: ChangeData[]): [RangeTokenNode[], RangeTokenNo
return [oldEdits, newEdits];
}

export function markEditsByWord(hunks: HunkData[]): TokenizeEnhancer {
export function markEditsBy(hunks: HunkData[], diffMethod: DiffMethod): TokenizeEnhancer {
const changeBlocks = flatMap(
hunks.map((hunk) => hunk.changes),
findChangeBlocks
);

const [oldEdits, newEdits] = changeBlocks.map(diffChangeBlock).reduce(
// eslint-disable-next-line @typescript-eslint/no-shadow
([oldEdits, newEdits], [currentOld, currentNew]) => [
oldEdits.concat(currentOld),
newEdits.concat(currentNew),
],
[[], []]
);
const [oldEdits, newEdits] = changeBlocks
.map((changes) => diffChangeBlock(changes, diffMethod))
.reduce(
// eslint-disable-next-line @typescript-eslint/no-shadow
([oldEdits, newEdits], [currentOld, currentNew]) => [
oldEdits.concat(currentOld),
newEdits.concat(currentNew),
],
[[], []]
);

return pickRanges(flatten(oldEdits), flatten(newEdits));
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,15 @@ import {
useGeneratedHtmlId,
useEuiTheme,
EuiSwitch,
EuiRadioGroup,
} from '@elastic/eui';
import type { RuleFieldsDiff } from '../../../../../common/api/detection_engine/prebuilt_rules/model/diff/rule_diff/rule_diff';
import type { RuleResponse } from '../../../../../common/api/detection_engine/model/rule_schema/rule_schemas.gen';
import { markEditsByWord } from './mark_edits_by_word';
import { markEditsBy, DiffMethod } from './mark_edits_by_word';

const HIDDEN_FIELDS = ['meta', 'rule_schedule', 'version'];

const DiffByWordEnabledContext = React.createContext(false);
const CompareMethodContext = React.createContext(DiffMethod.CHARS);

interface UnfoldProps extends Omit<DecorationProps, 'children'> {
start: number;
Expand Down Expand Up @@ -157,11 +158,7 @@ const useExpand = (hunks: HunkData[], oldSource: string, newSource: string) => {
};
};

const useTokens = (
hunks: HunkData[],
tokenizeChangesBy: 'chars' | 'words' = 'chars',
oldSource: string
) => {
const useTokens = (hunks: HunkData[], compareMethod: DiffMethod, oldSource: string) => {
if (!hunks) {
return undefined;
}
Expand All @@ -171,10 +168,12 @@ const useTokens = (
highlight: false,
enhancers: [
/*
"markEditsByWord" is a slightly modified version of "markEdits" enhancer from react-diff-view
"markEditsBy" is a slightly modified version of "markEdits" enhancer from react-diff-view
to enable word-level highlighting.
*/
tokenizeChangesBy === 'words' ? markEditsByWord(hunks) : markEdits(hunks, { type: 'block' }),
compareMethod === DiffMethod.CHARS
? markEdits(hunks, { type: 'block' }) // Using built-in "markEdits" enhancer for char-level diffing
: markEditsBy(hunks, compareMethod), // Using custom "markEditsBy" enhancer for other-level diffing
],
};

Expand Down Expand Up @@ -351,7 +350,8 @@ const CustomStyles = ({ children }: CustomStylesProps) => {
};

function DiffView({ oldSource, newSource }: DiffViewProps) {
const diffByWordEnabled = useContext(DiffByWordEnabledContext);
const compareMethod = useContext(CompareMethodContext);

/*
"react-diff-view" components consume diffs not as a strings, but as something they call "hunks".
So we first need to convert our "before" and "after" strings into these "hunks".
Expand All @@ -378,7 +378,7 @@ function DiffView({ oldSource, newSource }: DiffViewProps) {
Here we go over each hunk and extract tokens from it. For example, splitting strings into words,
so we can later highlight changes on a word-by-word basis vs line-by-line.
*/
const tokens = useTokens(hunks, diffByWordEnabled ? 'words' : 'chars', oldSource);
const tokens = useTokens(hunks, compareMethod, oldSource);

return (
<Diff
Expand Down Expand Up @@ -543,29 +543,55 @@ export const RuleDiffTabReactDiffView = ({ fields, oldRule, newRule }: RuleDiffT
}));
};

const [diffByWordEnabled, setDiffByWordEnabled] = useState(false);
const options = [
{
id: DiffMethod.CHARS,
label: 'Chars',
},
{
id: DiffMethod.WORDS,
label: 'Words',
},
{
id: DiffMethod.WORDS_CUSTOM_USING_DMP,
label: 'Words, alternative method (using "diff-match-patch" library)',
},
{
id: DiffMethod.LINES,
label: 'Lines',
},
{
id: DiffMethod.SENTENCES,
label: 'Sentences',
},
];

const [compareMethod, setCompareMethod] = useState<DiffMethod>(DiffMethod.CHARS);

return (
<>
<EuiSpacer size="m" />
<EuiRadioGroup
options={options}
idSelected={compareMethod}
onChange={(optionId) => {
setCompareMethod(optionId as DiffMethod);
}}
legend={{
children: <span>{'Diffing algorthm'}</span>,
}}
/>
<EuiSpacer size="m" />
<CustomStyles>
<EuiSwitch
label="Diff by word"
checked={diffByWordEnabled}
onChange={() => {
setDiffByWordEnabled(!diffByWordEnabled);
}}
/>
<EuiSpacer size="m" />
<DiffByWordEnabledContext.Provider value={diffByWordEnabled}>
<CompareMethodContext.Provider value={compareMethod}>
<WholeObjectDiff
oldRule={oldRule}
newRule={newRule}
openSections={openSections}
toggleSection={toggleSection}
/>
<Fields fields={fields} openSections={openSections} toggleSection={toggleSection} />
</DiffByWordEnabledContext.Provider>
</CompareMethodContext.Provider>
</CustomStyles>
</>
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,15 @@ const Fields = ({ fields, openSections, toggleSection }: FieldsProps) => {
const currentVersion: string = get(fields, [fieldName, 'current_version'], '');
const mergedVersion: string = get(fields, [fieldName, 'merged_version'], '');

const oldSource = JSON.stringify(currentVersion, null, 2);
const newSource = JSON.stringify(mergedVersion, null, 2);
const oldSource =
compareMethod === DiffMethod.JSON && typeof currentVersion === 'object'
? currentVersion
: JSON.stringify(currentVersion, null, 2);

const newSource =
compareMethod === DiffMethod.JSON && typeof currentVersion === 'object'
? mergedVersion
: JSON.stringify(mergedVersion, null, 2);

return (
<>
Expand Down Expand Up @@ -164,11 +171,19 @@ const WholeObjectDiff = ({
openSections,
toggleSection,
}: WholeObjectDiffProps) => {
const oldSource = sortAndStringifyJson(oldRule);
const newSource = sortAndStringifyJson(newRule);
const compareMethod = useContext(CompareMethodContext);

const oldSource =
compareMethod === DiffMethod.JSON && typeof oldRule === 'object'
? oldRule
: sortAndStringifyJson(oldRule);

const newSource =
compareMethod === DiffMethod.JSON && typeof newRule === 'object'
? newRule
: sortAndStringifyJson(newRule);

const styles = useContext(CustomStylesContext);
const compareMethod = useContext(CompareMethodContext);

return (
<>
Expand Down

0 comments on commit be5cad2

Please sign in to comment.