-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathapertium-tat.tat.mb.rlx
157 lines (112 loc) · 3.07 KB
/
apertium-tat.tat.mb.rlx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# Constraint grammar disambiguation rules for translating Tatar metrical books.
# Invoked after apertium-tat.tat.rlx to reduce ambiguity to 0 in a way suitable
# for this particular domain.
# ========== #
# Delimiters #
# ========== #
DELIMITERS = "<.>" "<!>" "<?>" "<...>" "<¶>" ;
SOFT-DELIMITERS = "<,>" ;
SUBREADINGS = LTR ;
# ============= #
# Tags and sets #
# ============= #
SETS
LIST BOS = (>>>) ;
LIST EOS = (<<<) ;
# First-level/Parts-of-speech tags
# ================================
LIST A = adj ;
LIST Adv = adv ;
LIST Pron = prn ;
LIST N = n ;
LIST Prop = np ;
LIST V = v ;
LIST Vaux = vaux ;
LIST Cop = cop ;
LIST Det = det ;
LIST CC = cnjcoo ;
LIST CS = cnjsub ;
LIST Interj = ij ;
LIST Num = num ;
LIST Post = post ;
LIST Postadv = postadv ;
LIST Mod_ass = mod_ass ;
LIST Sent = sent ;
LIST Cm = cm ;
LIST Rquot = rquot ;
# POS sub-categories
# ==================
LIST Pers = pers ;
LIST Dem = dem;
LIST Interr = itg ;
LIST Cog = cog ;
LIST Ant = ant ;
# "Syntactic" tags
# ================
LIST Advl = advl ;
LIST Attr = attr ;
LIST Subst = subst ;
# Morphosyntactic properties # if you find yourself embracing the same tag in
# ========================== # brackets over and over again, you may want to
# add it here
LIST Sg = sg ;
LIST Pl = pl ;
LIST Nom = nom ;
LIST Gen = gen ;
LIST Dat = dat ;
LIST Acc = acc ;
LIST Abl = abl ;
LIST Loc = loc ;
LIST P1 = p1 ;
LIST P2 = p2 ;
LIST P3 = p3 ;
LIST PersonalPossessives = px1sg px2sg px3sp px1pl px2pl ;
LIST Pres = pres ;
LIST Inf = inf ;
LIST TV = tv ;
LIST Opt = opt ;
LIST Imp = imp ;
LIST Coop = coop ;
LIST Frm = frm ;
# Postpositions
# =============
LIST PostGoverningDat = "күрә" ;
# Verb sets
# =========
LIST FiniteVerb = pres past ifi fut fut2 imp opt;
LIST Gerund = ger ger_ppot ger_past ger_perf ger_impf ger_abs ;
LIST Ger6 = ger6 ;
LIST Ger_past = ger_past ;
LIST Participle = prc_perf prc_impf prc_cond prc_vol prc_plan ;
LIST Prc_impf = prc_impf ;
LIST VerbalAdverb = gna_perf gna_cond gna_until gna_after ;
LIST Gna_cond = gna_cond ;
LIST Gpr_fut = gpr_fut ;
LIST Gpr_past = gpr_past ;
LIST Bar-etc = ("бар" adj) ("юк" adj) ("күп" adj) ;
# All possible word categories
# ============================
SET WORD = N | V | A | Post | Postadv | Pron | Det | Adv | CC | CS | Interj |
Num | ("\?") ;
SET PRE-N = A | Det | Postadv | Num | (n gen) | (prn gen) | CC ;
SET MARK = Cm | ("\\") | ("\;") ;
SET WORDMARK = WORD | MARK ;
SET N-MOD = A | Det | Num | (n gen) | (prn gen) ;
SET ADJ-MOD = Postadv | Adv ;
SET NPMARK = N-MOD | ADJ-MOD ;
SET NOMINAL = N | Prop | Pron | Subst | Gerund ;
# Categories which cannot be part of a noun phrase
# ================================================
SET NPNH = WORDMARK - PRE-N ;
SET NPNHA = WORDMARK - PRE-N - Adv ;
SET NOT-ADV = WORDMARK - Adv ;
# Barriers
# ========
SET S-BOUNDARY = CS | Interr | EOS;
SECTION
"<атасы>" SELECT N ;
"<Атасы>" SELECT N ;
"<яшендә>" SELECT N + ("яшь");
"<исем>" SELECT N + ("исем");
"<йөз>" SELECT Num;
SELECT Prop; # IF (0 ("[:upper:]+[:lower:]*"r));