-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathweblicht-pipelines.nix
77 lines (74 loc) · 2 KB
/
weblicht-pipelines.nix
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
{ alpinoTokenizerProtobuf
, syntaxdotModels }:
{
# Use annotation models from the syntaxdot-models flake.
annotators = with syntaxdotModels; {
de-ud-huge = de-ud-huge;
de-ud-large = de-ud-large;
de-ud-large-albert = de-ud-large-albert;
de-ud-medium = de-ud-medium;
nl-ud-huge = nl-ud-huge;
nl-ud-large = nl-ud-large;
nl-ud-large-albert = nl-ud-large-albert;
nl-ud-medium = nl-ud-medium;
};
# Pipelines, the annotator and tokenizer names should correspond
# to thise available through the `annotators` and `tokenizers`
# attribute sets.
pipelines = {
de-ud-huge = {
annotator = "de-ud-huge";
tokenizer = "whitespace_tokenizer";
};
de-ud-large = {
annotator = "de-ud-large";
tokenizer = "whitespace_tokenizer";
};
de-ud-large-albert = {
annotator = "de-ud-large-albert";
tokenizer = "whitespace_tokenizer";
};
de-ud-medium = {
annotator = "de-ud-medium";
tokenizer = "whitespace_tokenizer";
};
nl-ud-huge = {
annotator = "nl-ud-huge";
tokenizer = "whitespace_tokenizer";
};
nl-ud-huge-tokenize = {
annotator = "nl-ud-huge";
tokenizer = "alpino_tokenizer";
};
nl-ud-large = {
annotator = "nl-ud-large";
tokenizer = "whitespace_tokenizer";
};
nl-ud-large-tokenize = {
annotator = "nl-ud-large";
tokenizer = "alpino_tokenizer";
};
nl-ud-large-albert = {
annotator = "nl-ud-large-albert";
tokenizer = "whitespace_tokenizer";
};
nl-ud-large-albert-tokenize = {
annotator = "nl-ud-large-albert";
tokenizer = "alpino_tokenizer";
};
nl-ud-medium = {
annotator = "nl-ud-medium";
tokenizer = "whitespace_tokenizer";
};
nl-ud-medium-tokenize = {
annotator = "nl-ud-medium";
tokenizer = "alpino_tokenizer";
};
};
tokenizers = {
alpino_tokenizer = {
alpino_tokenizer = "${alpinoTokenizerProtobuf}";
};
whitespace_tokenizer = "whitespace_tokenizer";
};
}