Skip to content

Commit e4f8029

Browse files
committed
fix ucsur mapping
1 parent 70d63f7 commit e4f8029

File tree

2 files changed

+178
-144
lines changed

2 files changed

+178
-144
lines changed

src/parser/test.ts

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
// This code is Deno only
22

3+
import { assertEquals } from "@std/assert/equals";
34
import { assertNotEquals } from "@std/assert/not-equals";
45
import { assertThrows } from "@std/assert/throws";
5-
import { parse } from "./parser.ts";
66
import { EXAMPLE_SENTENCES, MALFORMED_SENTENCES } from "../examples.ts";
7+
import { parse } from "./parser.ts";
8+
import { KU_LILI, KU_SULI, PU } from "./ucsur.ts";
9+
import { assert } from "@std/assert/assert";
710

811
Deno.test("AST all distinct", () => {
912
for (const sentence of EXAMPLE_SENTENCES) {
@@ -20,6 +23,27 @@ Deno.test("parser all error", () => {
2023
}
2124
});
2225

26+
Deno.test("ucsur have proper length", () => {
27+
assertEquals(PU.length, 120);
28+
assertEquals(KU_SULI.length, 17);
29+
assertEquals(KU_LILI.length, 4);
30+
});
31+
32+
Deno.test("ucsur ordered", () => {
33+
for (const [i, word] of PU.entries()) {
34+
if (i < PU.length - 1) {
35+
const other = PU[i + 1];
36+
assert(word < PU[i + 1], `error between ${word} and ${other}`);
37+
}
38+
}
39+
});
40+
41+
Deno.test("no ali", () => {
42+
for (const word of PU) {
43+
assertNotEquals(word, "ali");
44+
}
45+
});
46+
2347
function uniquePairs<T>(
2448
array: ReadonlyArray<T>,
2549
): ReadonlyArray<readonly [T, T]> {

src/parser/ucsur.ts

Lines changed: 153 additions & 143 deletions
Original file line numberDiff line numberDiff line change
@@ -33,148 +33,158 @@ export const SPECIAL_UCSUR_DESCRIPTIONS: Map<string, string> = new Map(
3333
[UCSUR_COLON]: "colon",
3434
}),
3535
);
36-
export const UCSUR_TO_LATIN: Map<string, string> = new Map(Object.entries({
37-
"\u{F1900}": "a",
38-
"\u{F1901}": "akesi",
39-
"\u{F1902}": "ala",
40-
"\u{F1903}": "alasa",
41-
"\u{F1904}": "ale",
42-
"\u{F1905}": "anpa",
43-
"\u{F1906}": "ante",
44-
"\u{F1907}": "anu",
45-
"\u{F1908}": "awen",
46-
"\u{F1909}": "e",
47-
"\u{F190A}": "en",
48-
"\u{F190B}": "esun",
49-
"\u{F190C}": "ijo",
50-
"\u{F190D}": "ike",
51-
"\u{F190E}": "ilo",
52-
"\u{F190F}": "insa",
53-
"\u{F1910}": "jaki",
54-
"\u{F1911}": "jan",
55-
"\u{F1912}": "jelo",
56-
"\u{F1913}": "jo",
57-
"\u{F1914}": "kala",
58-
"\u{F1915}": "kalama",
59-
"\u{F1916}": "kama",
60-
"\u{F1917}": "kasi",
61-
"\u{F1918}": "ken",
62-
"\u{F1919}": "kepeken",
63-
"\u{F191A}": "kili",
64-
"\u{F191B}": "kiwen",
65-
"\u{F191C}": "ko",
66-
"\u{F191D}": "kon",
67-
"\u{F191E}": "kule",
68-
"\u{F191F}": "kulupu",
69-
"\u{F1920}": "kute",
70-
"\u{F1921}": "la",
71-
"\u{F1922}": "lape",
72-
"\u{F1923}": "laso",
73-
"\u{F1924}": "lawa",
74-
"\u{F1925}": "len",
75-
"\u{F1926}": "lete",
76-
"\u{F1927}": "li",
77-
"\u{F1928}": "lili",
78-
"\u{F1929}": "linja",
79-
"\u{F192A}": "lipu",
80-
"\u{F192B}": "loje",
81-
"\u{F192C}": "lon",
82-
"\u{F192D}": "luka",
83-
"\u{F192E}": "lukin",
84-
"\u{F192F}": "lupa",
85-
"\u{F1930}": "ma",
86-
"\u{F1931}": "mama",
87-
"\u{F1932}": "mani",
88-
"\u{F1933}": "meli",
89-
"\u{F1934}": "mi",
90-
"\u{F1935}": "mije",
91-
"\u{F1936}": "moku",
92-
"\u{F1937}": "moli",
93-
"\u{F1938}": "monsi",
94-
"\u{F1939}": "mu",
95-
"\u{F193A}": "mun",
96-
"\u{F193B}": "musi",
97-
"\u{F193C}": "mute",
98-
"\u{F193D}": "nanpa",
99-
"\u{F193E}": "nasa",
100-
"\u{F193F}": "nasin",
101-
"\u{F1940}": "lupa",
102-
"\u{F1941}": "ni",
103-
"\u{F1942}": "nimi",
104-
"\u{F1943}": "noka",
105-
"\u{F1944}": "o",
106-
"\u{F1945}": "olin",
107-
"\u{F1946}": "ona",
108-
"\u{F1947}": "open",
109-
"\u{F1948}": "pakala",
110-
"\u{F1949}": "pali",
111-
"\u{F194A}": "palisa",
112-
"\u{F194B}": "pan",
113-
"\u{F194C}": "pana",
114-
"\u{F194D}": "pi",
115-
"\u{F194E}": "pilin",
116-
"\u{F194F}": "pimeja",
117-
"\u{F1950}": "pini",
118-
"\u{F1951}": "pipi",
119-
"\u{F1952}": "poka",
120-
"\u{F1953}": "poki",
121-
"\u{F1954}": "pona",
122-
"\u{F1955}": "pu",
123-
"\u{F1956}": "sama",
124-
"\u{F1957}": "seli",
125-
"\u{F1958}": "selo",
126-
"\u{F1959}": "seme",
127-
"\u{F195A}": "sewi",
128-
"\u{F195B}": "sijelo",
129-
"\u{F195C}": "sike",
130-
"\u{F195D}": "sin",
131-
"\u{F195E}": "sina",
132-
"\u{F195F}": "sinpin",
133-
"\u{F1960}": "sitelen",
134-
"\u{F1961}": "sona",
135-
"\u{F1962}": "soweli",
136-
"\u{F1963}": "suli",
137-
"\u{F1964}": "suno",
138-
"\u{F1965}": "supa",
139-
"\u{F1966}": "suwi",
140-
"\u{F1967}": "tan",
141-
"\u{F1968}": "taso",
142-
"\u{F1969}": "tawa",
143-
"\u{F196A}": "telo",
144-
"\u{F196B}": "tenpo",
145-
"\u{F196C}": "toki",
146-
"\u{F196D}": "tomo",
147-
"\u{F196E}": "tu",
148-
"\u{F196F}": "unpa",
149-
"\u{F1970}": "uta",
150-
"\u{F1971}": "utala",
151-
"\u{F1972}": "walo",
152-
"\u{F1973}": "wan",
153-
"\u{F1974}": "waso",
154-
"\u{F1975}": "wawa",
155-
"\u{F1976}": "weka",
156-
"\u{F1977}": "wile",
157-
"\u{F1978}": "namako",
158-
"\u{F1979}": "kin",
159-
"\u{F197A}": "oko",
160-
"\u{F197B}": "kipisi",
161-
"\u{F197C}": "leko",
162-
"\u{F197D}": "monsuta",
163-
"\u{F197E}": "tonsi",
164-
"\u{F197F}": "jasima",
165-
"\u{F1980}": "kijetesantakalu",
166-
"\u{F1981}": "soko",
167-
"\u{F1982}": "meso",
168-
"\u{F1983}": "epiku",
169-
"\u{F1984}": "kokosila",
170-
"\u{F1985}": "lanpan",
171-
"\u{F1986}": "n",
172-
"\u{F1987}": "misikeke",
173-
"\u{F1988}": "ku",
174-
"\u{F19A0}": "pake",
175-
"\u{F19A1}": "apeja",
176-
"\u{F19A2}": "majuna",
177-
"\u{F19A3}": "powe",
178-
}));
36+
export const PU = [
37+
"a",
38+
"akesi",
39+
"ala",
40+
"alasa",
41+
"ale",
42+
"anpa",
43+
"ante",
44+
"anu",
45+
"awen",
46+
"e",
47+
"en",
48+
"esun",
49+
"ijo",
50+
"ike",
51+
"ilo",
52+
"insa",
53+
"jaki",
54+
"jan",
55+
"jelo",
56+
"jo",
57+
"kala",
58+
"kalama",
59+
"kama",
60+
"kasi",
61+
"ken",
62+
"kepeken",
63+
"kili",
64+
"kiwen",
65+
"ko",
66+
"kon",
67+
"kule",
68+
"kulupu",
69+
"kute",
70+
"la",
71+
"lape",
72+
"laso",
73+
"lawa",
74+
"len",
75+
"lete",
76+
"li",
77+
"lili",
78+
"linja",
79+
"lipu",
80+
"loje",
81+
"lon",
82+
"luka",
83+
"lukin",
84+
"lupa",
85+
"ma",
86+
"mama",
87+
"mani",
88+
"meli",
89+
"mi",
90+
"mije",
91+
"moku",
92+
"moli",
93+
"monsi",
94+
"mu",
95+
"mun",
96+
"musi",
97+
"mute",
98+
"nanpa",
99+
"nasa",
100+
"nasin",
101+
"nena",
102+
"ni",
103+
"nimi",
104+
"noka",
105+
"o",
106+
"olin",
107+
"ona",
108+
"open",
109+
"pakala",
110+
"pali",
111+
"palisa",
112+
"pan",
113+
"pana",
114+
"pi",
115+
"pilin",
116+
"pimeja",
117+
"pini",
118+
"pipi",
119+
"poka",
120+
"poki",
121+
"pona",
122+
"pu",
123+
"sama",
124+
"seli",
125+
"selo",
126+
"seme",
127+
"sewi",
128+
"sijelo",
129+
"sike",
130+
"sin",
131+
"sina",
132+
"sinpin",
133+
"sitelen",
134+
"sona",
135+
"soweli",
136+
"suli",
137+
"suno",
138+
"supa",
139+
"suwi",
140+
"tan",
141+
"taso",
142+
"tawa",
143+
"telo",
144+
"tenpo",
145+
"toki",
146+
"tomo",
147+
"tu",
148+
"unpa",
149+
"uta",
150+
"utala",
151+
"walo",
152+
"wan",
153+
"waso",
154+
"wawa",
155+
"weka",
156+
"wile",
157+
];
158+
export const KU_SULI = [
159+
"namako",
160+
"kin",
161+
"oko",
162+
"kipisi",
163+
"leko",
164+
"monsuta",
165+
"tonsi",
166+
"jasima",
167+
"kijetesantakalu",
168+
"soko",
169+
"meso",
170+
"epiku",
171+
"kokosila",
172+
"lanpan",
173+
"n",
174+
"misikeke",
175+
"ku",
176+
];
177+
export const KU_LILI = [
178+
"pake",
179+
"apeja",
180+
"majuna",
181+
"powe",
182+
];
183+
export const UCSUR_TO_LATIN: Map<string, string> = new Map([
184+
...[...PU, ...KU_SULI]
185+
.map((latin, i) => [String.fromCodePoint(0xF1900 + i), latin] as const),
186+
...KU_LILI
187+
.map((latin, i) => [String.fromCodePoint(0xF19A0 + i), latin] as const),
188+
]);
179189
export const UCSUR_CHARACTER_REGEX =
180190
/[\u{F1900}-\u{F1988}\u{F19A0}-\u{F19A3}]/u;

0 commit comments

Comments
 (0)