File tree
26 files changed
+31
-44
lines changed- ac_dc
- deduplicate
- visualization
- bertin
- evaluation
- mc4
- utils
- cc_pseudo_crawl/python_scripts
- extract_text
- kenlm_training
- cc_net
- tests
- pii-manager
- src/pii_manager/api
- test/unit
- api
- helper
- tokenizer/python_script
26 files changed
+31
-44
lines changedDiff for: ac_dc/anonymization.py
+1-1
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
30 | 30 |
| |
31 | 31 |
| |
32 | 32 |
| |
33 |
| - | |
| 33 | + | |
34 | 34 |
| |
35 | 35 |
| |
36 | 36 |
|
Diff for: ac_dc/deduplicate/self_deduplicate.py
+1-4
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
1 | 1 |
| |
2 |
| - | |
3 | 2 |
| |
4 | 3 |
| |
5 | 4 |
| |
| |||
27 | 26 |
| |
28 | 27 |
| |
29 | 28 |
| |
30 |
| - | |
31 |
| - | |
| 29 | + | |
32 | 30 |
| |
33 | 31 |
| |
34 | 32 |
| |
| |||
201 | 199 |
| |
202 | 200 |
| |
203 | 201 |
| |
204 |
| - | |
205 | 202 |
|
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
21 | 21 |
| |
22 | 22 |
| |
23 | 23 |
| |
24 |
| - | |
25 | 24 |
| |
26 | 25 |
| |
27 | 26 |
| |
| |||
166 | 165 |
| |
167 | 166 |
| |
168 | 167 |
| |
169 |
| - | |
170 | 168 |
| |
171 | 169 |
| |
172 | 170 |
| |
|
Diff for: ac_dc/visualization/visualization.py
-2
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
625 | 625 |
| |
626 | 626 |
| |
627 | 627 |
| |
628 |
| - | |
629 | 628 |
| |
630 | 629 |
| |
631 | 630 |
| |
| |||
698 | 697 |
| |
699 | 698 |
| |
700 | 699 |
| |
701 |
| - | |
702 | 700 |
| |
703 | 701 |
| |
704 | 702 |
| |
|
Diff for: bertin/evaluation/run_glue.py
-1
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
1 | 1 |
| |
2 |
| - | |
3 | 2 |
| |
4 | 3 |
| |
5 | 4 |
| |
|
Diff for: bertin/evaluation/run_ner.py
-1
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
1 | 1 |
| |
2 |
| - | |
3 | 2 |
| |
4 | 3 |
| |
5 | 4 |
| |
|
Diff for: bertin/mc4/mc4.py
+1-1
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
404 | 404 |
| |
405 | 405 |
| |
406 | 406 |
| |
407 |
| - | |
| 407 | + | |
408 | 408 |
| |
409 | 409 |
| |
410 | 410 |
| |
|
Diff for: bertin/run_mlm_flax.py
-1
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
1 | 1 |
| |
2 |
| - | |
3 | 2 |
| |
4 | 3 |
| |
5 | 4 |
| |
|
Diff for: bertin/run_mlm_flax_stream.py
+1-2
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
1 | 1 |
| |
2 |
| - | |
3 | 2 |
| |
4 | 3 |
| |
5 | 4 |
| |
| |||
446 | 445 |
| |
447 | 446 |
| |
448 | 447 |
| |
449 |
| - | |
| 448 | + | |
450 | 449 |
| |
451 | 450 |
| |
452 | 451 |
| |
|
Diff for: bertin/utils/dataset_perplexity.py
+1-1
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
17 | 17 |
| |
18 | 18 |
| |
19 | 19 |
| |
20 |
| - | |
| 20 | + | |
21 | 21 |
| |
22 | 22 |
| |
23 | 23 |
|
+2-2
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
143 | 143 |
| |
144 | 144 |
| |
145 | 145 |
| |
146 |
| - | |
| 146 | + | |
147 | 147 |
| |
148 |
| - | |
| 148 | + | |
149 | 149 |
| |
150 | 150 |
| |
151 | 151 |
| |
|
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
431 | 431 |
| |
432 | 432 |
| |
433 | 433 |
| |
434 |
| - | |
435 | 434 |
| |
436 | 435 |
| |
437 | 436 |
| |
|
+1-1
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
21 | 21 |
| |
22 | 22 |
| |
23 | 23 |
| |
24 |
| - | |
| 24 | + | |
25 | 25 |
| |
26 | 26 |
| |
27 | 27 |
| |
|
Diff for: kenlm_training/cc_net/execution.py
-1
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
42 | 42 |
| |
43 | 43 |
| |
44 | 44 |
| |
45 |
| - | |
46 | 45 |
| |
47 | 46 |
| |
48 | 47 |
| |
|
Diff for: kenlm_training/cc_net/jsonql.py
+3-4
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
880 | 880 |
| |
881 | 881 |
| |
882 | 882 |
| |
883 |
| - | |
884 |
| - | |
| 883 | + | |
885 | 884 |
| |
886 | 885 |
| |
887 | 886 |
| |
| |||
961 | 960 |
| |
962 | 961 |
| |
963 | 962 |
| |
964 |
| - | |
| 963 | + | |
965 | 964 |
| |
966 | 965 |
| |
967 | 966 |
| |
| |||
1015 | 1014 |
| |
1016 | 1015 |
| |
1017 | 1016 |
| |
1018 |
| - | |
| 1017 | + | |
1019 | 1018 |
| |
1020 | 1019 |
| |
1021 | 1020 |
| |
|
Diff for: kenlm_training/tests/test_jsonql.py
+1-1
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
262 | 262 |
| |
263 | 263 |
| |
264 | 264 |
| |
265 |
| - | |
| 265 | + | |
266 | 266 |
| |
267 | 267 |
| |
268 | 268 |
| |
|
Diff for: pii-manager/setup.py
+2-2
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
27 | 27 |
| |
28 | 28 |
| |
29 | 29 |
| |
30 |
| - | |
| 30 | + | |
31 | 31 |
| |
32 | 32 |
| |
33 | 33 |
| |
34 | 34 |
| |
35 | 35 |
| |
36 | 36 |
| |
37 | 37 |
| |
38 |
| - | |
| 38 | + | |
39 | 39 |
| |
40 | 40 |
| |
41 | 41 |
| |
|
+3-6
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
31 | 31 |
| |
32 | 32 |
| |
33 | 33 |
| |
34 |
| - | |
35 |
| - | |
| 34 | + | |
36 | 35 |
| |
37 | 36 |
| |
38 | 37 |
| |
39 |
| - | |
40 |
| - | |
| 38 | + | |
41 | 39 |
| |
42 | 40 |
| |
43 | 41 |
| |
44 | 42 |
| |
45 | 43 |
| |
46 | 44 |
| |
47 | 45 |
| |
48 |
| - | |
49 |
| - | |
| 46 | + | |
50 | 47 |
| |
51 | 48 |
| |
52 | 49 |
| |
|
Diff for: pii-manager/test/unit/api/test_file.py
+1-1
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
12 | 12 |
| |
13 | 13 |
| |
14 | 14 |
| |
15 |
| - | |
| 15 | + | |
16 | 16 |
| |
17 | 17 |
| |
18 | 18 |
| |
|
+1-1
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
14 | 14 |
| |
15 | 15 |
| |
16 | 16 |
| |
17 |
| - | |
| 17 | + | |
18 | 18 |
| |
19 | 19 |
| |
20 | 20 |
| |
|
+4-1
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
21 | 21 |
| |
22 | 22 |
| |
23 | 23 |
| |
24 |
| - | |
| 24 | + | |
| 25 | + | |
| 26 | + | |
| 27 | + | |
25 | 28 |
| |
26 | 29 |
| |
27 | 30 |
| |
|
+2-2
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
47 | 47 |
| |
48 | 48 |
| |
49 | 49 |
| |
50 |
| - | |
| 50 | + | |
51 | 51 |
| |
52 | 52 |
| |
53 | 53 |
| |
| |||
86 | 86 |
| |
87 | 87 |
| |
88 | 88 |
| |
89 |
| - | |
| 89 | + | |
90 | 90 |
| |
91 | 91 |
|
+2-2
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
38 | 38 |
| |
39 | 39 |
| |
40 | 40 |
| |
41 |
| - | |
| 41 | + | |
42 | 42 |
| |
43 | 43 |
| |
44 | 44 |
| |
| |||
64 | 64 |
| |
65 | 65 |
| |
66 | 66 |
| |
67 |
| - | |
| 67 | + | |
68 | 68 |
| |
69 | 69 |
|
+2-2
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
74 | 74 |
| |
75 | 75 |
| |
76 | 76 |
| |
77 |
| - | |
| 77 | + | |
78 | 78 |
| |
79 | 79 |
| |
80 | 80 |
| |
| |||
83 | 83 |
| |
84 | 84 |
| |
85 | 85 |
| |
86 |
| - | |
| 86 | + | |
87 | 87 |
| |
88 | 88 |
| |
89 | 89 |
| |
|
+1-1
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
8 | 8 |
| |
9 | 9 |
| |
10 | 10 |
| |
11 |
| - | |
| 11 | + | |
12 | 12 |
|
Diff for: tokenizer/python_script/dedup_lines.py
+1
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
28 | 28 |
| |
29 | 29 |
| |
30 | 30 |
| |
| 31 | + | |
31 | 32 |
| |
32 | 33 |
| |
33 | 34 |
| |
|
0 commit comments