Fixed arabic stuff

rijnb · rijnb · commit 312844a33f82 · 2016-10-02T13:17:34.000+02:00
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -22,7 +22,7 @@ set(SOURCE_FILES_UNITTEST
 set(SOURCE_FILES_UTILITY
         utility/mapcode.cpp)
 
-add_executable(all ${SOURCE_FILES})
+add_executable(fullset ${SOURCE_FILES})
 
 add_executable(unittest ${SOURCE_FILES_UNITTEST})
 
diff --git a/mapcodelib/mapcoder.c b/mapcodelib/mapcoder.c
@@ -1748,6 +1748,7 @@ static int decoderEngine(decodeRec *dec) {
 // WARNING - these alphabets have NOT yet been released as standard! use at your own risk! check www.mapcode.com for details.
 static UWORD asc2lan[MAPCODE_ALPHABETS_TOTAL][36] = // A-Z equivalents for ascii characters A to Z, 0-9
         {
+                // Character:   A       B       C       D       E       F       G       H       I       J       K       L       M       N       O       P       Q       R       S       T       U       V       W       X       Y       Z       0       1       2       3       4       5       6       7       8       9
                 {0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039}, // roman
                 {0x0391, 0x0392, 0x039e, 0x0394, 0x0388, 0x0395, 0x0393, 0x0397, 0x0399, 0x03a0, 0x039a, 0x039b, 0x039c, 0x039d, 0x039f, 0x03a1, 0x0398, 0x03a8, 0x03a3, 0x03a4, 0x0389, 0x03a6, 0x03a9, 0x03a7, 0x03a5, 0x0396, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039}, // greek
                 {0x0410, 0x0412, 0x0421, 0x0414, 0x0415, 0x0416, 0x0413, 0x041d, 0x0418, 0x041f, 0x041a, 0x041b, 0x041c, 0x0417, 0x041e, 0x0420, 0x0424, 0x042f, 0x0426, 0x0422, 0x042d, 0x0427, 0x0428, 0x0425, 0x0423, 0x0411, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039}, // cyrillic
@@ -1907,15 +1908,35 @@ UWORD *convertToAlphabet(UWORD *unibuf, int maxlength, const char *mapcode, int
                         *unibuf = 0;
                         return startbuf;
                     }
-                    *unibuf++ = *mapcode++;
+                    *unibuf++ = (UWORD) *mapcode++;
                 }
             }
         }
 
-        if (alphabet == 1 || alphabet == 3 || alphabet == 14) {
+        if (alphabet == 1 || alphabet == 3 || alphabet == 14) { // greek hebrew arabic
             mapcode = convertToAbjad(u, mapcode, USIZE);
         }
 
+        // re-pack E/U-voweled mapcodes when necessary:
+        if (alphabet == 1) { // alphabet has no letter E (greek!)
+            if (strchr(mapcode, 'E') || strchr(mapcode, 'U') ||
+                strchr(mapcode, 'e') || strchr(mapcode, 'u')) {
+                // copy trimmed mapcode into temporary buffer u
+                int len = (int) strlen(mapcode);
+                if (len < MAX_MAPCODE_RESULT_LEN) {
+                    while (len > 0 && mapcode[len - 1] > 0 && mapcode[len - 1] <= 32) {
+                        len--;
+                    }
+                    memcpy(u, mapcode, len);
+                    u[len] = 0;
+                    // re-pack into A-voweled mapcode
+                    unpack_if_alldigits(u);
+                    repack_if_alldigits(u, 1);
+                    mapcode = u;
+                }
+            }
+        }
+
         encode_utf16(unibuf, 1 + (int) (lastspot - unibuf), mapcode, alphabet);
     }
     return startbuf;
@@ -2134,29 +2155,27 @@ static int cmp_alphacode(const void *e1, const void *e2) {
 
 static int binfindmatch(const int parentcode, const char *str) {
     // build a 4-letter uppercase search term
-    char tmp[5];
+    char alphaCode[5];
     const char *r = str;
     int len = 0;
 
     if (parentcode < 0) {
         return -1;
     }
     if (parentcode > 0) {
-        tmp[len++] = (char) ('0' + parentcode);
+        alphaCode[len++] = (char) ('0' + parentcode);
     }
     while ((len < 4) && (*r > 32)) {
-        tmp[len++] = *r++;
+        alphaCode[len++] = *r++;
     }
     if (*r > 32) {
         return -1;
     }
-    tmp[len] = 0;
-    makeupper(tmp);
+    alphaCode[len] = 0;
+    makeupper(alphaCode);
     { // binary-search the result
         const alphaRec *p;
-        alphaRec t;
-        t.alphaCode = tmp;
-        t.ccode = parentcode;
+        alphaRec t = {alphaCode, parentcode};
 
         p = (const alphaRec *) bsearch(&t, alphaSearch, NRTERREC, sizeof(alphaRec), cmp_alphacode);
         if (p) {
@@ -2605,7 +2624,7 @@ static void convertFromAbjad(char *s) {
     }
     repack_if_alldigits(s, 0);
     if (postfix) {
-        int len = (int) strlen(s);
+        len = (int) strlen(s);
         *postfix = '-';
         memmove(s + len, postfix, strlen(postfix) + 1);
     }
diff --git a/mapcodelib/mapcoder.h b/mapcodelib/mapcoder.h
@@ -183,7 +183,7 @@ int getTerritoryCode(
  * Convert a territory name to a territory code.
  *
  * Arguments:
- *      result          - String to store result
+ *      result          - String to store result.
  *      territoryCode   - Territory code.
  *      format          - Pass 0 for full name, 1 for short name (state codes may be ambiguous).
  *
@@ -250,7 +250,7 @@ double maxErrorInMeters(int extraDigits);
  * Arguments:
  *      lat             - Latitude, in degrees. Range: -90..90.
  *      lon             - Longitude, in degrees. Range: -180..180.
- *      territoryCode   - Territory code (obtained from getTerritoryCode)
+ *      territoryCode   - Territory code (obtained from getTerritoryCode).
  *
  * returns nonzero if coordinate is near more than one territory border
  *
@@ -294,7 +294,7 @@ int multipleBordersNearby(
  *      maxlen   - Size of asciibuf
  *
  * Returns:
- *      pointer to asciibuf, which holds the result
+ *      Pointer to asciibuf, which holds the result.
  */
 char *convertToRoman(char *asciibuf, int maxlen, const UWORD *string);
 
@@ -310,10 +310,9 @@ const char *decodeToRoman(const UWORD *string);
  *
  * Arguments:
  *      string     - String to encode.
- *      alphabet   - Alphabet to use. Currently supported are:
- *                      0 = roman, 2 = cyrillic, 4 = hindi, 12 = gurmukhi.
- *      unibuf     - Buffer to be filled with the result
- *      maxlen     - Size of unibuf
+ *      alphabet   - Alphabet to use.
+ *      unibuf     - Buffer to be filled with the result.
+ *      maxlen     - Size of unibuf.
  *
  *
  * Returns:
@@ -362,6 +361,7 @@ const UWORD *encodeToAlphabet(const char *string, int alphabet);
 #define MAPCODE_LANGUAGE_BENGALI       MAPCODE_ALPHABET_BENGALI
 #define MAPCODE_LANGUAGE_GURMUKHI      MAPCODE_ALPHABET_GURMUKHI
 #define MAPCODE_LANGUAGE_TIBETAN       MAPCODE_ALPHABET_TIBETAN
+#define MAPCODE_LANGUAGE_ARABIC        MAPCODE_ALPHABET_ARABIC
 
 #ifdef __cplusplus
 }
diff --git a/unittest/unittest.c b/unittest/unittest.c
@@ -37,6 +37,7 @@ static void alphabet_tests() {
     int i, j;
     const char *str, *expect;
     static const char *testpairs[] = {
+            "00.E0", "00.E0",
             ".123", ".123",
             "", "",
             "-", "-",
@@ -53,6 +54,7 @@ static void alphabet_tests() {
             "OMN 112.3EU", "OMN 112.3EU",
             "49.4V", "49.4V",
             "NLD 49.4V-xx123", "NLD 49.4V-XX123",
+            "A12.345", "112.3AU",
             "xx.xx", "XX.XX",
             "xx.xxx", "XX.XXX",
             "xxx.xx", "XXX.XX",
@@ -77,22 +79,22 @@ static void alphabet_tests() {
             "xx.xxx-pq", "XX.XXX-PQ",
             "xxx.xx-123", "XXX.XX-123",
             "xx.xxxx-pqRS", "XX.XXXX-PQRS",
-            "xxx.xxx-PQRSTUVW", "XXX.XXX-PQRSTUVW",
-            "xxxx.xx-pqrstuvw", "XXXX.XX-PQRSTUVW",
-            "xxx.xxxx-PQrsTU", "XXX.XXXX-PQRSTU",
+            "xxx.xxx-PQRSTZVW", "XXX.XXX-PQRSTZVW",
+            "xxxx.xx-pqrstZvw", "XXXX.XX-PQRSTZVW",
+            "xxx.xxxx-PQrsTZ", "XXX.XXXX-PQRSTZ",
             "xxxx.xxx-09876543", "XXXX.XXX-09876543",
-            "xxxx.xxxx-PQRSTUVW", "XXXX.XXXX-PQRSTUVW",
-            "xxxxx.xxxx-PQRSTUVW", "XXXXX.XXXX-PQRSTUVW",
-            "pq.rs-PQRSTUVW", "PQ.RS-PQRSTUVW",
-            "bc.123-PQRSTUVW", "BC.123-PQRSTUVW",
-            "123.xy-PQRSTUVW", "123.XY-PQRSTUVW",
-            "12.34E0-PQRSTUVW", "12.34E0-PQRSTUVW",
-            "VVX.xxx-PQRSTUVW", "VVX.XXX-PQRSTUVW",
-            "x123.xx-PQRSTUVW", "X123.XX-PQRSTUVW",
-            "xxx.xxxx-PQRSTUVW", "XXX.XXXX-PQRSTUVW",
-            "12xx.xxx-PQRSTUVW", "12XX.XXX-PQRSTUVW",
-            "xxxx.xx12-PQRSTUVW", "XXXX.XX12-PQRSTUVW",
-            "99zxx.xxxx-PQRSTUVW", "99ZXX.XXXX-PQRSTUVW",
+            "xxxx.xxxx-PQRSTZVW", "XXXX.XXXX-PQRSTZVW",
+            "xxxxx.xxxx-PQRSTZVW", "XXXXX.XXXX-PQRSTZVW",
+            "pq.rs-PQRSTZVW", "PQ.RS-PQRSTZVW",
+            "bc.123-PQRSTZVW", "BC.123-PQRSTZVW",
+            "123.xy-PQRSTZVW", "123.XY-PQRSTZVW",
+            "12.34E0-PQRSTZVW", "12.34E0-PQRSTZVW",
+            "VVX.xxx-PQRSTZVW", "VVX.XXX-PQRSTZVW",
+            "x123.xx-PQRSTZVW", "X123.XX-PQRSTZVW",
+            "xxx.xxxx-PQRSTZVW", "XXX.XXXX-PQRSTZVW",
+            "12xx.xxx-PQRSTZVW", "12XX.XXX-PQRSTZVW",
+            "xxxx.xx12-PQRSTZVW", "XXXX.XX12-PQRSTZVW",
+            "99zxx.xxxx-PQRSTZVW", "99ZXX.XXXX-PQRSTZVW",
             NULL
     };
 
@@ -110,8 +112,7 @@ static void alphabet_tests() {
             nrTests++;
             if (strcmp(dec, expect)) {
                 nrErrors++;
-                printf("*** ERROR *** convertToRoman(convertToAlphabet(\"%s\",%d))=\"%s\", expect=\"%s\"\n", str, i,
-                       dec, expect);
+                printf("convertToRoman(convertToAlphabet(\"%s\",%d))=\"%s\"\n", str, i, dec);
             }
         }
     }

Original file line number	Diff line number	Diff line change
`@@ -183,7 +183,7 @@ int getTerritoryCode(`
`183`	`183`	`* Convert a territory name to a territory code.`
`184`	`184`	`*`
`185`	`185`	`* Arguments:`
`186`		`- * result - String to store result`
	`186`	`+ * result - String to store result.`
`187`	`187`	`* territoryCode - Territory code.`
`188`	`188`	`* format - Pass 0 for full name, 1 for short name (state codes may be ambiguous).`
`189`	`189`	`*`
`@@ -250,7 +250,7 @@ double maxErrorInMeters(int extraDigits);`
`250`	`250`	`* Arguments:`
`251`	`251`	`* lat - Latitude, in degrees. Range: -90..90.`
`252`	`252`	`* lon - Longitude, in degrees. Range: -180..180.`
`253`		`- * territoryCode - Territory code (obtained from getTerritoryCode)`
	`253`	`+ * territoryCode - Territory code (obtained from getTerritoryCode).`
`254`	`254`	`*`
`255`	`255`	`* returns nonzero if coordinate is near more than one territory border`
`256`	`256`	`*`
`@@ -294,7 +294,7 @@ int multipleBordersNearby(`
`294`	`294`	`* maxlen - Size of asciibuf`
`295`	`295`	`*`
`296`	`296`	`* Returns:`
`297`		`- * pointer to asciibuf, which holds the result`
	`297`	`+ * Pointer to asciibuf, which holds the result.`
`298`	`298`	`*/`
`299`	`299`	`char convertToRoman(char asciibuf, int maxlen, const UWORD *string);`
`300`	`300`
`@@ -310,10 +310,9 @@ const char decodeToRoman(const UWORD string);`
`310`	`310`	`*`
`311`	`311`	`* Arguments:`
`312`	`312`	`* string - String to encode.`
`313`		`- * alphabet - Alphabet to use. Currently supported are:`
`314`		`- * 0 = roman, 2 = cyrillic, 4 = hindi, 12 = gurmukhi.`
`315`		`- * unibuf - Buffer to be filled with the result`
`316`		`- * maxlen - Size of unibuf`
	`313`	`+ * alphabet - Alphabet to use.`
	`314`	`+ * unibuf - Buffer to be filled with the result.`
	`315`	`+ * maxlen - Size of unibuf.`
`317`	`316`	`*`
`318`	`317`	`*`
`319`	`318`	`* Returns:`
`@@ -362,6 +361,7 @@ const UWORD encodeToAlphabet(const char string, int alphabet);`
`362`	`361`	`#define MAPCODE_LANGUAGE_BENGALI MAPCODE_ALPHABET_BENGALI`
`363`	`362`	`#define MAPCODE_LANGUAGE_GURMUKHI MAPCODE_ALPHABET_GURMUKHI`
`364`	`363`	`#define MAPCODE_LANGUAGE_TIBETAN MAPCODE_ALPHABET_TIBETAN`
	`364`	`+#define MAPCODE_LANGUAGE_ARABIC MAPCODE_ALPHABET_ARABIC`
`365`	`365`
`366`	`366`	`#ifdef __cplusplus`
`367`	`367`	`}`