Skip to content

Commit 312844a

Browse files
committed
Fixed arabic stuff
1 parent ca96d99 commit 312844a

File tree

4 files changed

+56
-36
lines changed

4 files changed

+56
-36
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ set(SOURCE_FILES_UNITTEST
2222
set(SOURCE_FILES_UTILITY
2323
utility/mapcode.cpp)
2424

25-
add_executable(all ${SOURCE_FILES})
25+
add_executable(fullset ${SOURCE_FILES})
2626

2727
add_executable(unittest ${SOURCE_FILES_UNITTEST})
2828

mapcodelib/mapcoder.c

100644100755
Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1748,6 +1748,7 @@ static int decoderEngine(decodeRec *dec) {
17481748
// WARNING - these alphabets have NOT yet been released as standard! use at your own risk! check www.mapcode.com for details.
17491749
static UWORD asc2lan[MAPCODE_ALPHABETS_TOTAL][36] = // A-Z equivalents for ascii characters A to Z, 0-9
17501750
{
1751+
// Character: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 0 1 2 3 4 5 6 7 8 9
17511752
{0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039}, // roman
17521753
{0x0391, 0x0392, 0x039e, 0x0394, 0x0388, 0x0395, 0x0393, 0x0397, 0x0399, 0x03a0, 0x039a, 0x039b, 0x039c, 0x039d, 0x039f, 0x03a1, 0x0398, 0x03a8, 0x03a3, 0x03a4, 0x0389, 0x03a6, 0x03a9, 0x03a7, 0x03a5, 0x0396, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039}, // greek
17531754
{0x0410, 0x0412, 0x0421, 0x0414, 0x0415, 0x0416, 0x0413, 0x041d, 0x0418, 0x041f, 0x041a, 0x041b, 0x041c, 0x0417, 0x041e, 0x0420, 0x0424, 0x042f, 0x0426, 0x0422, 0x042d, 0x0427, 0x0428, 0x0425, 0x0423, 0x0411, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039}, // cyrillic
@@ -1907,15 +1908,35 @@ UWORD *convertToAlphabet(UWORD *unibuf, int maxlength, const char *mapcode, int
19071908
*unibuf = 0;
19081909
return startbuf;
19091910
}
1910-
*unibuf++ = *mapcode++;
1911+
*unibuf++ = (UWORD) *mapcode++;
19111912
}
19121913
}
19131914
}
19141915

1915-
if (alphabet == 1 || alphabet == 3 || alphabet == 14) {
1916+
if (alphabet == 1 || alphabet == 3 || alphabet == 14) { // greek hebrew arabic
19161917
mapcode = convertToAbjad(u, mapcode, USIZE);
19171918
}
19181919

1920+
// re-pack E/U-voweled mapcodes when necessary:
1921+
if (alphabet == 1) { // alphabet has no letter E (greek!)
1922+
if (strchr(mapcode, 'E') || strchr(mapcode, 'U') ||
1923+
strchr(mapcode, 'e') || strchr(mapcode, 'u')) {
1924+
// copy trimmed mapcode into temporary buffer u
1925+
int len = (int) strlen(mapcode);
1926+
if (len < MAX_MAPCODE_RESULT_LEN) {
1927+
while (len > 0 && mapcode[len - 1] > 0 && mapcode[len - 1] <= 32) {
1928+
len--;
1929+
}
1930+
memcpy(u, mapcode, len);
1931+
u[len] = 0;
1932+
// re-pack into A-voweled mapcode
1933+
unpack_if_alldigits(u);
1934+
repack_if_alldigits(u, 1);
1935+
mapcode = u;
1936+
}
1937+
}
1938+
}
1939+
19191940
encode_utf16(unibuf, 1 + (int) (lastspot - unibuf), mapcode, alphabet);
19201941
}
19211942
return startbuf;
@@ -2134,29 +2155,27 @@ static int cmp_alphacode(const void *e1, const void *e2) {
21342155

21352156
static int binfindmatch(const int parentcode, const char *str) {
21362157
// build a 4-letter uppercase search term
2137-
char tmp[5];
2158+
char alphaCode[5];
21382159
const char *r = str;
21392160
int len = 0;
21402161

21412162
if (parentcode < 0) {
21422163
return -1;
21432164
}
21442165
if (parentcode > 0) {
2145-
tmp[len++] = (char) ('0' + parentcode);
2166+
alphaCode[len++] = (char) ('0' + parentcode);
21462167
}
21472168
while ((len < 4) && (*r > 32)) {
2148-
tmp[len++] = *r++;
2169+
alphaCode[len++] = *r++;
21492170
}
21502171
if (*r > 32) {
21512172
return -1;
21522173
}
2153-
tmp[len] = 0;
2154-
makeupper(tmp);
2174+
alphaCode[len] = 0;
2175+
makeupper(alphaCode);
21552176
{ // binary-search the result
21562177
const alphaRec *p;
2157-
alphaRec t;
2158-
t.alphaCode = tmp;
2159-
t.ccode = parentcode;
2178+
alphaRec t = {alphaCode, parentcode};
21602179

21612180
p = (const alphaRec *) bsearch(&t, alphaSearch, NRTERREC, sizeof(alphaRec), cmp_alphacode);
21622181
if (p) {
@@ -2605,7 +2624,7 @@ static void convertFromAbjad(char *s) {
26052624
}
26062625
repack_if_alldigits(s, 0);
26072626
if (postfix) {
2608-
int len = (int) strlen(s);
2627+
len = (int) strlen(s);
26092628
*postfix = '-';
26102629
memmove(s + len, postfix, strlen(postfix) + 1);
26112630
}

mapcodelib/mapcoder.h

100644100755
Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ int getTerritoryCode(
183183
* Convert a territory name to a territory code.
184184
*
185185
* Arguments:
186-
* result - String to store result
186+
* result - String to store result.
187187
* territoryCode - Territory code.
188188
* format - Pass 0 for full name, 1 for short name (state codes may be ambiguous).
189189
*
@@ -250,7 +250,7 @@ double maxErrorInMeters(int extraDigits);
250250
* Arguments:
251251
* lat - Latitude, in degrees. Range: -90..90.
252252
* lon - Longitude, in degrees. Range: -180..180.
253-
* territoryCode - Territory code (obtained from getTerritoryCode)
253+
* territoryCode - Territory code (obtained from getTerritoryCode).
254254
*
255255
* returns nonzero if coordinate is near more than one territory border
256256
*
@@ -294,7 +294,7 @@ int multipleBordersNearby(
294294
* maxlen - Size of asciibuf
295295
*
296296
* Returns:
297-
* pointer to asciibuf, which holds the result
297+
* Pointer to asciibuf, which holds the result.
298298
*/
299299
char *convertToRoman(char *asciibuf, int maxlen, const UWORD *string);
300300

@@ -310,10 +310,9 @@ const char *decodeToRoman(const UWORD *string);
310310
*
311311
* Arguments:
312312
* string - String to encode.
313-
* alphabet - Alphabet to use. Currently supported are:
314-
* 0 = roman, 2 = cyrillic, 4 = hindi, 12 = gurmukhi.
315-
* unibuf - Buffer to be filled with the result
316-
* maxlen - Size of unibuf
313+
* alphabet - Alphabet to use.
314+
* unibuf - Buffer to be filled with the result.
315+
* maxlen - Size of unibuf.
317316
*
318317
*
319318
* Returns:
@@ -362,6 +361,7 @@ const UWORD *encodeToAlphabet(const char *string, int alphabet);
362361
#define MAPCODE_LANGUAGE_BENGALI MAPCODE_ALPHABET_BENGALI
363362
#define MAPCODE_LANGUAGE_GURMUKHI MAPCODE_ALPHABET_GURMUKHI
364363
#define MAPCODE_LANGUAGE_TIBETAN MAPCODE_ALPHABET_TIBETAN
364+
#define MAPCODE_LANGUAGE_ARABIC MAPCODE_ALPHABET_ARABIC
365365

366366
#ifdef __cplusplus
367367
}

unittest/unittest.c

100644100755
Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ static void alphabet_tests() {
3737
int i, j;
3838
const char *str, *expect;
3939
static const char *testpairs[] = {
40+
"00.E0", "00.E0",
4041
".123", ".123",
4142
"", "",
4243
"-", "-",
@@ -53,6 +54,7 @@ static void alphabet_tests() {
5354
"OMN 112.3EU", "OMN 112.3EU",
5455
"49.4V", "49.4V",
5556
"NLD 49.4V-xx123", "NLD 49.4V-XX123",
57+
"A12.345", "112.3AU",
5658
"xx.xx", "XX.XX",
5759
"xx.xxx", "XX.XXX",
5860
"xxx.xx", "XXX.XX",
@@ -77,22 +79,22 @@ static void alphabet_tests() {
7779
"xx.xxx-pq", "XX.XXX-PQ",
7880
"xxx.xx-123", "XXX.XX-123",
7981
"xx.xxxx-pqRS", "XX.XXXX-PQRS",
80-
"xxx.xxx-PQRSTUVW", "XXX.XXX-PQRSTUVW",
81-
"xxxx.xx-pqrstuvw", "XXXX.XX-PQRSTUVW",
82-
"xxx.xxxx-PQrsTU", "XXX.XXXX-PQRSTU",
82+
"xxx.xxx-PQRSTZVW", "XXX.XXX-PQRSTZVW",
83+
"xxxx.xx-pqrstZvw", "XXXX.XX-PQRSTZVW",
84+
"xxx.xxxx-PQrsTZ", "XXX.XXXX-PQRSTZ",
8385
"xxxx.xxx-09876543", "XXXX.XXX-09876543",
84-
"xxxx.xxxx-PQRSTUVW", "XXXX.XXXX-PQRSTUVW",
85-
"xxxxx.xxxx-PQRSTUVW", "XXXXX.XXXX-PQRSTUVW",
86-
"pq.rs-PQRSTUVW", "PQ.RS-PQRSTUVW",
87-
"bc.123-PQRSTUVW", "BC.123-PQRSTUVW",
88-
"123.xy-PQRSTUVW", "123.XY-PQRSTUVW",
89-
"12.34E0-PQRSTUVW", "12.34E0-PQRSTUVW",
90-
"VVX.xxx-PQRSTUVW", "VVX.XXX-PQRSTUVW",
91-
"x123.xx-PQRSTUVW", "X123.XX-PQRSTUVW",
92-
"xxx.xxxx-PQRSTUVW", "XXX.XXXX-PQRSTUVW",
93-
"12xx.xxx-PQRSTUVW", "12XX.XXX-PQRSTUVW",
94-
"xxxx.xx12-PQRSTUVW", "XXXX.XX12-PQRSTUVW",
95-
"99zxx.xxxx-PQRSTUVW", "99ZXX.XXXX-PQRSTUVW",
86+
"xxxx.xxxx-PQRSTZVW", "XXXX.XXXX-PQRSTZVW",
87+
"xxxxx.xxxx-PQRSTZVW", "XXXXX.XXXX-PQRSTZVW",
88+
"pq.rs-PQRSTZVW", "PQ.RS-PQRSTZVW",
89+
"bc.123-PQRSTZVW", "BC.123-PQRSTZVW",
90+
"123.xy-PQRSTZVW", "123.XY-PQRSTZVW",
91+
"12.34E0-PQRSTZVW", "12.34E0-PQRSTZVW",
92+
"VVX.xxx-PQRSTZVW", "VVX.XXX-PQRSTZVW",
93+
"x123.xx-PQRSTZVW", "X123.XX-PQRSTZVW",
94+
"xxx.xxxx-PQRSTZVW", "XXX.XXXX-PQRSTZVW",
95+
"12xx.xxx-PQRSTZVW", "12XX.XXX-PQRSTZVW",
96+
"xxxx.xx12-PQRSTZVW", "XXXX.XX12-PQRSTZVW",
97+
"99zxx.xxxx-PQRSTZVW", "99ZXX.XXXX-PQRSTZVW",
9698
NULL
9799
};
98100

@@ -110,8 +112,7 @@ static void alphabet_tests() {
110112
nrTests++;
111113
if (strcmp(dec, expect)) {
112114
nrErrors++;
113-
printf("*** ERROR *** convertToRoman(convertToAlphabet(\"%s\",%d))=\"%s\", expect=\"%s\"\n", str, i,
114-
dec, expect);
115+
printf("convertToRoman(convertToAlphabet(\"%s\",%d))=\"%s\"\n", str, i, dec);
115116
}
116117
}
117118
}

0 commit comments

Comments
 (0)