-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjustfile
45 lines (39 loc) · 1.25 KB
/
justfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
input_file := "data/wikipedia.xml"
_default:
just -l
cargo_run := "cargo run --release"
parser := cargo_run + " --bin parser -- "
subgraph-extractor := cargo_run + " --bin subgraph-extractor -- "
extract-links:
{{parser}} \
--extractor links \
--input-file {{input_file}} \
--output-data-file "output/links/data.jsonl" \
--output-index-file "output/links/index.txt" \
--input-file-threads 16
extract-contents:
{{parser}} \
--extractor contents \
--input-file {{input_file}} \
--output-data-file "output/contents/data.jsonl" \
--output-index-file "output/contents/index.txt" \
--input-file-threads 16
extract-subgraph root depth:
{{subgraph-extractor}} \
--method depth-limited \
--input-data-file "output/links/data.jsonl" \
--input-index-file "output/links/index.txt" \
--output-file "output/subgraph/{{root}}.txt" \
--input-file-threads 16 \
--root-page "{{root}}" \
--depth {{depth}}
extract-subgraph-fanout root depth fanout-factor:
{{subgraph-extractor}} \
--method depth-limited \
--input-data-file "output/links/data.jsonl" \
--input-index-file "output/links/index.txt" \
--output-file "output/subgraph/root.txt" \
--input-file-threads 16 \
--root-page "{{root}}" \
--depth {{depth}} \
--fanout {{fanout-factor}}