use flat memory layout

marler8997 · marler8997 · commit 1590e53486ad · 2021-03-06T08:37:15.000-07:00
diff --git a/re.c b/re.c
@@ -35,30 +35,31 @@
 
 /* Definitions: */
 
-#define MAX_REGEXP_OBJECTS      30    /* Max number of regex symbols in expression. */
-#define MAX_CHAR_CLASS_LEN      40    /* Max length of character-class buffer in.   */
+#define MAX_REGEXP_LEN      70    /* Max number of bytes for a regex. */
 
 
 enum { UNUSED, DOT, BEGIN, END, QUESTIONMARK, STAR, PLUS, CHAR, CHAR_CLASS, INV_CHAR_CLASS, DIGIT, NOT_DIGIT, ALPHA, NOT_ALPHA, WHITESPACE, NOT_WHITESPACE, /* BRANCH */ };
 
 typedef struct regex_t
 {
-  unsigned char  type;   /* CHAR, STAR, etc.                      */
-  union
-  {
-    unsigned char  ch;   /*      the character itself             */
-    unsigned char* ccl;  /*  OR  a pointer to characters in class */
-  } u;
+  unsigned char type;    /* CHAR, STAR, etc.                      */
+  unsigned char data_len;
+  unsigned char data[0];
 } regex_t;
 
+static re_t getnext(regex_t* pattern)
+{
+  return (re_t)(((unsigned char*)pattern) + 2 + pattern->data_len);
+}
+
 
 
 /* Private function declarations: */
 static int matchpattern(regex_t* pattern, const char* text, int* matchlength);
 static int matchcharclass(char c, const char* str);
-static int matchstar(regex_t p, regex_t* pattern, const char* text, int* matchlength);
-static int matchplus(regex_t p, regex_t* pattern, const char* text, int* matchlength);
-static int matchone(regex_t p, char c);
+static int matchstar(regex_t *p, regex_t* pattern, const char* text, int* matchlength);
+static int matchplus(regex_t *p, regex_t* pattern, const char* text, int* matchlength);
+static int matchone(regex_t* p, char c);
 static int matchdigit(char c);
 static int matchalpha(char c);
 static int matchwhitespace(char c);
@@ -80,9 +81,9 @@ int re_matchp(re_t pattern, const char* text, int* matchlength)
   *matchlength = 0;
   if (pattern != 0)
   {
-    if (pattern[0].type == BEGIN)
+    if (pattern->type == BEGIN)
     {
-      return ((matchpattern(&pattern[1], text, matchlength)) ? 0 : -1);
+      return ((matchpattern(getnext(pattern), text, matchlength)) ? 0 : -1);
     }
     else
     {
@@ -106,33 +107,37 @@ int re_matchp(re_t pattern, const char* text, int* matchlength)
   return -1;
 }
 
+static int min(int a, int b)
+{
+  return (a <= b) ? a : b;
+}
+
 re_t re_compile(const char* pattern)
 {
-  /* The sizes of the two static arrays below substantiates the static RAM usage of this module.
-     MAX_REGEXP_OBJECTS is the max number of symbols in the expression.
-     MAX_CHAR_CLASS_LEN determines the size of buffer for chars in all char-classes in the expression. */
-  static regex_t re_compiled[MAX_REGEXP_OBJECTS];
-  static unsigned char ccl_buf[MAX_CHAR_CLASS_LEN];
-  int ccl_bufidx = 1;
+  /* The size of this static array substantiates the static RAM usage of this module.
+     MAX_REGEXP_LEN is the max number number of bytes in the expression. */
+  static unsigned char re_data[MAX_REGEXP_LEN];
 
   char c;     /* current char in pattern   */
   int i = 0;  /* index into pattern        */
-  int j = 0;  /* index into re_compiled    */
+  int j = 0;  /* index into re_data    */
 
-  while (pattern[i] != '\0' && (j+1 < MAX_REGEXP_OBJECTS))
+  while (pattern[i] != '\0' && (j+3 < MAX_REGEXP_LEN))
   {
     c = pattern[i];
+    regex_t *re_compiled = (regex_t*)(re_data+j);
+    re_compiled->data_len = 0;
 
     switch (c)
     {
       /* Meta-characters: */
-      case '^': {    re_compiled[j].type = BEGIN;           } break;
-      case '$': {    re_compiled[j].type = END;             } break;
-      case '.': {    re_compiled[j].type = DOT;             } break;
-      case '*': {    re_compiled[j].type = STAR;            } break;
-      case '+': {    re_compiled[j].type = PLUS;            } break;
-      case '?': {    re_compiled[j].type = QUESTIONMARK;    } break;
-/*    case '|': {    re_compiled[j].type = BRANCH;          } break; <-- not working properly */
+      case '^': {    re_compiled->type = BEGIN;           } break;
+      case '$': {    re_compiled->type = END;             } break;
+      case '.': {    re_compiled->type = DOT;             } break;
+      case '*': {    re_compiled->type = STAR;            } break;
+      case '+': {    re_compiled->type = PLUS;            } break;
+      case '?': {    re_compiled->type = QUESTIONMARK;    } break;
+/*    case '|': {    re_compiled->type = BRANCH;          } break; <-- not working properly */
 
       /* Escaped character-classes (\s \w ...): */
       case '\\':
@@ -145,41 +150,42 @@ re_t re_compile(const char* pattern)
           switch (pattern[i])
           {
             /* Meta-character: */
-            case 'd': {    re_compiled[j].type = DIGIT;            } break;
-            case 'D': {    re_compiled[j].type = NOT_DIGIT;        } break;
-            case 'w': {    re_compiled[j].type = ALPHA;            } break;
-            case 'W': {    re_compiled[j].type = NOT_ALPHA;        } break;
-            case 's': {    re_compiled[j].type = WHITESPACE;       } break;
-            case 'S': {    re_compiled[j].type = NOT_WHITESPACE;   } break;
+            case 'd': {    re_compiled->type = DIGIT;            } break;
+            case 'D': {    re_compiled->type = NOT_DIGIT;        } break;
+            case 'w': {    re_compiled->type = ALPHA;            } break;
+            case 'W': {    re_compiled->type = NOT_ALPHA;        } break;
+            case 's': {    re_compiled->type = WHITESPACE;       } break;
+            case 'S': {    re_compiled->type = NOT_WHITESPACE;   } break;
 
             /* Escaped character, e.g. '.' or '$' */
             default:
             {
-              re_compiled[j].type = CHAR;
-              re_compiled[j].u.ch = pattern[i];
+              re_compiled->type = CHAR;
+              re_compiled->data_len = 1;
+              re_compiled->data[0] = pattern[i];
             } break;
           }
         }
         /* '\\' as last char in pattern -> invalid regular expression. */
 /*
         else
         {
-          re_compiled[j].type = CHAR;
-          re_compiled[j].ch = pattern[i];
+          re_compiled->type = CHAR;
+          re_compiled->data_len = 1;
+          re_compiled->data[0] = pattern[i];
         }
 */
       } break;
 
       /* Character class: */
       case '[':
       {
-        /* Remember where the char-buffer starts. */
-        int buf_begin = ccl_bufidx;
+        int char_limit = min(0xff, MAX_REGEXP_LEN - j - 4); // 4 for this object and UNUSED at the minimum
 
         /* Look-ahead to determine if negated */
         if (pattern[i+1] == '^')
         {
-          re_compiled[j].type = INV_CHAR_CLASS;
+          re_compiled->type = INV_CHAR_CLASS;
           i += 1; /* Increment i to avoid including '^' in the char-buffer */
           if (pattern[i+1] == 0) /* incomplete pattern, missing non-zero char after '^' */
           {
@@ -188,7 +194,7 @@ re_t re_compile(const char* pattern)
         }
         else
         {
-          re_compiled[j].type = CHAR_CLASS;
+          re_compiled->type = CHAR_CLASS;
         }
 
         /* Copy characters inside [..] to buffer */
@@ -197,7 +203,7 @@ re_t re_compile(const char* pattern)
         {
           if (pattern[i] == '\\')
           {
-            if (ccl_bufidx >= MAX_CHAR_CLASS_LEN - 1)
+            if (re_compiled->data_len >= char_limit)
             {
               //fputs("exceeded internal buffer!\n", stderr);
               return 0;
@@ -206,31 +212,32 @@ re_t re_compile(const char* pattern)
             {
               return 0;
             }
-            ccl_buf[ccl_bufidx++] = pattern[i++];
+            re_compiled->data[re_compiled->data_len++] = pattern[i++];
           }
-          else if (ccl_bufidx >= MAX_CHAR_CLASS_LEN)
+          // TODO: I think this "else if" is a bug, should just be "if"
+          else if (re_compiled->data_len >= char_limit)
           {
               //fputs("exceeded internal buffer!\n", stderr);
               return 0;
           }
-          ccl_buf[ccl_bufidx++] = pattern[i];
+          re_compiled->data[re_compiled->data_len++] = pattern[i];
         }
-        if (ccl_bufidx >= MAX_CHAR_CLASS_LEN)
+        if (re_compiled->data_len >= char_limit)
         {
             /* Catches cases such as [00000000000000000000000000000000000000][ */
             //fputs("exceeded internal buffer!\n", stderr);
             return 0;
         }
         /* Null-terminate string end */
-        ccl_buf[ccl_bufidx++] = 0;
-        re_compiled[j].u.ccl = &ccl_buf[buf_begin];
+        re_compiled->data[re_compiled->data_len++] = 0;
       } break;
 
       /* Other characters: */
       default:
       {
-        re_compiled[j].type = CHAR;
-        re_compiled[j].u.ch = c;
+        re_compiled->type = CHAR;
+        re_compiled->data_len = 1;
+        re_compiled->data[0] = c;
       } break;
     }
     /* no buffer-out-of-bounds access on invalid patterns - see https://github.com/kokke/tiny-regex-c/commit/1a279e04014b70b0695fba559a7c05d55e6ee90b */
@@ -240,35 +247,39 @@ re_t re_compile(const char* pattern)
     }
 
     i += 1;
-    j += 1;
+    j += 2 + re_compiled->data_len;
+  }
+  if (j + 1 >= MAX_REGEXP_LEN) {
+      //fputs("exceeded internal buffer!\n", stderr);
+       return 0;
   }
   /* 'UNUSED' is a sentinel used to indicate end-of-pattern */
-  re_compiled[j].type = UNUSED;
+  re_data[j] = UNUSED;
+  re_data[j+1] = 0;
 
-  return (re_t) re_compiled;
+  return (re_t) re_data;
 }
 
 void re_print(regex_t* pattern)
 {
   const char* types[] = { "UNUSED", "DOT", "BEGIN", "END", "QUESTIONMARK", "STAR", "PLUS", "CHAR", "CHAR_CLASS", "INV_CHAR_CLASS", "DIGIT", "NOT_DIGIT", "ALPHA", "NOT_ALPHA", "WHITESPACE", "NOT_WHITESPACE", "BRANCH" };
 
-  int i;
   int j;
   char c;
-  for (i = 0; i < MAX_REGEXP_OBJECTS; ++i)
+  for (;; pattern = getnext(pattern))
   {
-    if (pattern[i].type == UNUSED)
+    if (pattern->type == UNUSED)
     {
       break;
     }
 
-    printf("type: %s", types[pattern[i].type]);
-    if (pattern[i].type == CHAR_CLASS || pattern[i].type == INV_CHAR_CLASS)
+    printf("type: %s", types[pattern->type]);
+    if (pattern->type == CHAR_CLASS || pattern->type == INV_CHAR_CLASS)
     {
       printf(" [");
-      for (j = 0; j < MAX_CHAR_CLASS_LEN; ++j)
+      for (j = 0; j < pattern->data_len; ++j)
       {
-        c = pattern[i].u.ccl[j];
+        c = pattern->data[j];
         if ((c == '\0') || (c == ']'))
         {
           break;
@@ -277,9 +288,9 @@ void re_print(regex_t* pattern)
       }
       printf("]");
     }
-    else if (pattern[i].type == CHAR)
+    else if (pattern->type == CHAR)
     {
-      printf(" '%c'", pattern[i].u.ch);
+      printf(" '%c'", pattern->data[0]);
     }
     printf("\n");
   }
@@ -380,24 +391,25 @@ static int matchcharclass(char c, const char* str)
   return 0;
 }
 
-static int matchone(regex_t p, char c)
+static int matchone(regex_t* p, char c)
 {
-  switch (p.type)
+  switch (p->type)
   {
     case DOT:            return matchdot(c);
-    case CHAR_CLASS:     return  matchcharclass(c, (const char*)p.u.ccl);
-    case INV_CHAR_CLASS: return !matchcharclass(c, (const char*)p.u.ccl);
+    case CHAR_CLASS:     return  matchcharclass(c, (const char*)p->data);
+    case INV_CHAR_CLASS: return !matchcharclass(c, (const char*)p->data);
     case DIGIT:          return  matchdigit(c);
     case NOT_DIGIT:      return !matchdigit(c);
     case ALPHA:          return  matchalphanum(c);
     case NOT_ALPHA:      return !matchalphanum(c);
     case WHITESPACE:     return  matchwhitespace(c);
     case NOT_WHITESPACE: return !matchwhitespace(c);
-    default:             return  (p.u.ch == c);
+    case BEGIN:          return 0;
+    default:             return  (p->data[0] == c);
   }
 }
 
-static int matchstar(regex_t p, regex_t* pattern, const char* text, int* matchlength)
+static int matchstar(regex_t* p, regex_t* pattern, const char* text, int* matchlength)
 {
   int prelen = *matchlength;
   const char* prepoint = text;
@@ -417,7 +429,7 @@ static int matchstar(regex_t p, regex_t* pattern, const char* text, int* matchle
   return 0;
 }
 
-static int matchplus(regex_t p, regex_t* pattern, const char* text, int* matchlength)
+static int matchplus(regex_t* p, regex_t* pattern, const char* text, int* matchlength)
 {
   const char* prepoint = text;
   while ((text[0] != '\0') && matchone(p, *text))
@@ -435,10 +447,8 @@ static int matchplus(regex_t p, regex_t* pattern, const char* text, int* matchle
   return 0;
 }
 
-static int matchquestion(regex_t p, regex_t* pattern, const char* text, int* matchlength)
+static int matchquestion(regex_t *p, regex_t* pattern, const char* text, int* matchlength)
 {
-  if (p.type == UNUSED)
-    return 1;
   if (matchpattern(pattern, text, matchlength))
       return 1;
   if (*text && matchone(p, *text++))
@@ -493,33 +503,42 @@ static int matchpattern(regex_t* pattern, const char* text, int *matchlength)
 static int matchpattern(regex_t* pattern, const char* text, int* matchlength)
 {
   int pre = *matchlength;
-  do
+  while (1)
   {
-    if ((pattern[0].type == UNUSED) || (pattern[1].type == QUESTIONMARK))
+    if (pattern->type == UNUSED)
     {
-      return matchquestion(pattern[0], &pattern[2], text, matchlength);
+      return 1;
     }
-    else if (pattern[1].type == STAR)
+    regex_t* next_pattern = getnext(pattern);
+    if (next_pattern->type == QUESTIONMARK)
     {
-      return matchstar(pattern[0], &pattern[2], text, matchlength);
+      return matchquestion(pattern, getnext(next_pattern), text, matchlength);
     }
-    else if (pattern[1].type == PLUS)
+    else if (next_pattern->type == STAR)
     {
-      return matchplus(pattern[0], &pattern[2], text, matchlength);
+      return matchstar(pattern, getnext(next_pattern), text, matchlength);
     }
-    else if ((pattern[0].type == END) && pattern[1].type == UNUSED)
+    else if (next_pattern->type == PLUS)
+    {
+      return matchplus(pattern, getnext(next_pattern), text, matchlength);
+    }
+    else if ((pattern->type == END) && next_pattern->type == UNUSED)
     {
       return (text[0] == '\0');
     }
 /*  Branching is not working properly
-    else if (pattern[1].type == BRANCH)
+    else if (pattern->type == BRANCH)
     {
-      return (matchpattern(pattern, text) || matchpattern(&pattern[2], text));
+      return (matchpattern(pattern, text) || matchpattern(getnext(next_pattern), text));
     }
 */
   (*matchlength)++;
+    if (text[0] == '\0')
+      break;
+    if (!matchone(pattern, *text++))
+      break;
+    pattern = next_pattern;
   }
-  while ((text[0] != '\0') && matchone(*pattern++, *text++));
 
   *matchlength = pre;
   return 0;

Original file line number	Diff line number	Diff line change
`@@ -35,30 +35,31 @@`
`35`	`35`
`36`	`36`	`/* Definitions: */`
`37`	`37`
`38`		`-#define MAX_REGEXP_OBJECTS 30 /* Max number of regex symbols in expression. */`
`39`		`-#define MAX_CHAR_CLASS_LEN 40 /* Max length of character-class buffer in. */`
	`38`	`+#define MAX_REGEXP_LEN 70 /* Max number of bytes for a regex. */`
`40`	`39`
`41`	`40`
`42`	`41`	`enum { UNUSED, DOT, BEGIN, END, QUESTIONMARK, STAR, PLUS, CHAR, CHAR_CLASS, INV_CHAR_CLASS, DIGIT, NOT_DIGIT, ALPHA, NOT_ALPHA, WHITESPACE, NOT_WHITESPACE, /* BRANCH */ };`
`43`	`42`
`44`	`43`	`typedef struct regex_t`
`45`	`44`	`{`
`46`		`- unsigned char type; /* CHAR, STAR, etc. */`
`47`		`- union`
`48`		`- {`
`49`		`- unsigned char ch; /* the character itself */`
`50`		`- unsigned char* ccl; /* OR a pointer to characters in class */`
`51`		`- } u;`
	`45`	`+ unsigned char type; /* CHAR, STAR, etc. */`
	`46`	`+ unsigned char data_len;`
	`47`	`+ unsigned char data[0];`
`52`	`48`	`} regex_t;`
`53`	`49`
	`50`	`+static re_t getnext(regex_t* pattern)`
	`51`	`+{`
	`52`	`+ return (re_t)(((unsigned char*)pattern) + 2 + pattern->data_len);`
	`53`	`+}`
	`54`	`+`
`54`	`55`
`55`	`56`
`56`	`57`	`/* Private function declarations: */`
`57`	`58`	`static int matchpattern(regex_t* pattern, const char* text, int* matchlength);`
`58`	`59`	`static int matchcharclass(char c, const char* str);`
`59`		`-static int matchstar(regex_t p, regex_t* pattern, const char* text, int* matchlength);`
`60`		`-static int matchplus(regex_t p, regex_t* pattern, const char* text, int* matchlength);`
`61`		`-static int matchone(regex_t p, char c);`
	`60`	`+static int matchstar(regex_t p, regex_t pattern, const char* text, int* matchlength);`
	`61`	`+static int matchplus(regex_t p, regex_t pattern, const char* text, int* matchlength);`
	`62`	`+static int matchone(regex_t* p, char c);`
`62`	`63`	`static int matchdigit(char c);`
`63`	`64`	`static int matchalpha(char c);`
`64`	`65`	`static int matchwhitespace(char c);`
`@@ -80,9 +81,9 @@ int re_matchp(re_t pattern, const char* text, int* matchlength)`
`80`	`81`	`*matchlength = 0;`
`81`	`82`	`if (pattern != 0)`
`82`	`83`	`{`
`83`		`- if (pattern[0].type == BEGIN)`
	`84`	`+ if (pattern->type == BEGIN)`
`84`	`85`	`{`
`85`		`- return ((matchpattern(&pattern[1], text, matchlength)) ? 0 : -1);`
	`86`	`+ return ((matchpattern(getnext(pattern), text, matchlength)) ? 0 : -1);`
`86`	`87`	`}`
`87`	`88`	`else`
`88`	`89`	`{`
`@@ -106,33 +107,37 @@ int re_matchp(re_t pattern, const char* text, int* matchlength)`
`106`	`107`	`return -1;`
`107`	`108`	`}`
`108`	`109`
	`110`	`+static int min(int a, int b)`
	`111`	`+{`
	`112`	`+ return (a <= b) ? a : b;`
	`113`	`+}`
	`114`	`+`
`109`	`115`	`re_t re_compile(const char* pattern)`
`110`	`116`	`{`
`111`		`- /* The sizes of the two static arrays below substantiates the static RAM usage of this module.`
`112`		`- MAX_REGEXP_OBJECTS is the max number of symbols in the expression.`
`113`		`- MAX_CHAR_CLASS_LEN determines the size of buffer for chars in all char-classes in the expression. */`
`114`		`- static regex_t re_compiled[MAX_REGEXP_OBJECTS];`
`115`		`- static unsigned char ccl_buf[MAX_CHAR_CLASS_LEN];`
`116`		`- int ccl_bufidx = 1;`
	`117`	`+ /* The size of this static array substantiates the static RAM usage of this module.`
	`118`	`+ MAX_REGEXP_LEN is the max number number of bytes in the expression. */`
	`119`	`+ static unsigned char re_data[MAX_REGEXP_LEN];`
`117`	`120`
`118`	`121`	`char c; /* current char in pattern */`
`119`	`122`	`int i = 0; /* index into pattern */`
`120`		`- int j = 0; /* index into re_compiled */`
	`123`	`+ int j = 0; /* index into re_data */`
`121`	`124`
`122`		`- while (pattern[i] != '\0' && (j+1 < MAX_REGEXP_OBJECTS))`
	`125`	`+ while (pattern[i] != '\0' && (j+3 < MAX_REGEXP_LEN))`
`123`	`126`	`{`
`124`	`127`	`c = pattern[i];`
	`128`	`+ regex_t re_compiled = (regex_t)(re_data+j);`
	`129`	`+ re_compiled->data_len = 0;`
`125`	`130`
`126`	`131`	`switch (c)`
`127`	`132`	`{`
`128`	`133`	`/* Meta-characters: */`
`129`		`- case '^': { re_compiled[j].type = BEGIN; } break;`
`130`		`- case '$': { re_compiled[j].type = END; } break;`
`131`		`- case '.': { re_compiled[j].type = DOT; } break;`
`132`		`- case '*': { re_compiled[j].type = STAR; } break;`
`133`		`- case '+': { re_compiled[j].type = PLUS; } break;`
`134`		`- case '?': { re_compiled[j].type = QUESTIONMARK; } break;`
`135`		`-/* case '\|': { re_compiled[j].type = BRANCH; } break; <-- not working properly */`
	`134`	`+ case '^': { re_compiled->type = BEGIN; } break;`
	`135`	`+ case '$': { re_compiled->type = END; } break;`
	`136`	`+ case '.': { re_compiled->type = DOT; } break;`
	`137`	`+ case '*': { re_compiled->type = STAR; } break;`
	`138`	`+ case '+': { re_compiled->type = PLUS; } break;`
	`139`	`+ case '?': { re_compiled->type = QUESTIONMARK; } break;`
	`140`	`+/* case '\|': { re_compiled->type = BRANCH; } break; <-- not working properly */`
`136`	`141`
`137`	`142`	`/* Escaped character-classes (\s \w ...): */`
`138`	`143`	`case '\\':`
`@@ -145,41 +150,42 @@ re_t re_compile(const char* pattern)`
`145`	`150`	`switch (pattern[i])`
`146`	`151`	`{`
`147`	`152`	`/* Meta-character: */`
`148`		`- case 'd': { re_compiled[j].type = DIGIT; } break;`
`149`		`- case 'D': { re_compiled[j].type = NOT_DIGIT; } break;`
`150`		`- case 'w': { re_compiled[j].type = ALPHA; } break;`
`151`		`- case 'W': { re_compiled[j].type = NOT_ALPHA; } break;`
`152`		`- case 's': { re_compiled[j].type = WHITESPACE; } break;`
`153`		`- case 'S': { re_compiled[j].type = NOT_WHITESPACE; } break;`
	`153`	`+ case 'd': { re_compiled->type = DIGIT; } break;`
	`154`	`+ case 'D': { re_compiled->type = NOT_DIGIT; } break;`
	`155`	`+ case 'w': { re_compiled->type = ALPHA; } break;`
	`156`	`+ case 'W': { re_compiled->type = NOT_ALPHA; } break;`
	`157`	`+ case 's': { re_compiled->type = WHITESPACE; } break;`
	`158`	`+ case 'S': { re_compiled->type = NOT_WHITESPACE; } break;`
`154`	`159`
`155`	`160`	`/* Escaped character, e.g. '.' or '$' */`
`156`	`161`	`default:`
`157`	`162`	`{`
`158`		`- re_compiled[j].type = CHAR;`
`159`		`- re_compiled[j].u.ch = pattern[i];`
	`163`	`+ re_compiled->type = CHAR;`
	`164`	`+ re_compiled->data_len = 1;`
	`165`	`+ re_compiled->data[0] = pattern[i];`
`160`	`166`	`} break;`
`161`	`167`	`}`
`162`	`168`	`}`
`163`	`169`	`/* '\\' as last char in pattern -> invalid regular expression. */`
`164`	`170`	`/*`
`165`	`171`	`else`
`166`	`172`	`{`
`167`		`- re_compiled[j].type = CHAR;`
`168`		`- re_compiled[j].ch = pattern[i];`
	`173`	`+ re_compiled->type = CHAR;`
	`174`	`+ re_compiled->data_len = 1;`
	`175`	`+ re_compiled->data[0] = pattern[i];`
`169`	`176`	`}`
`170`	`177`	`*/`
`171`	`178`	`} break;`
`172`	`179`
`173`	`180`	`/* Character class: */`
`174`	`181`	`case '[':`
`175`	`182`	`{`
`176`		`- /* Remember where the char-buffer starts. */`
`177`		`- int buf_begin = ccl_bufidx;`
	`183`	`+ int char_limit = min(0xff, MAX_REGEXP_LEN - j - 4); // 4 for this object and UNUSED at the minimum`
`178`	`184`
`179`	`185`	`/* Look-ahead to determine if negated */`
`180`	`186`	`if (pattern[i+1] == '^')`
`181`	`187`	`{`
`182`		`- re_compiled[j].type = INV_CHAR_CLASS;`
	`188`	`+ re_compiled->type = INV_CHAR_CLASS;`
`183`	`189`	`i += 1; /* Increment i to avoid including '^' in the char-buffer */`
`184`	`190`	`if (pattern[i+1] == 0) /* incomplete pattern, missing non-zero char after '^' */`
`185`	`191`	`{`
`@@ -188,7 +194,7 @@ re_t re_compile(const char* pattern)`
`188`	`194`	`}`
`189`	`195`	`else`
`190`	`196`	`{`
`191`		`- re_compiled[j].type = CHAR_CLASS;`
	`197`	`+ re_compiled->type = CHAR_CLASS;`
`192`	`198`	`}`
`193`	`199`
`194`	`200`	`/* Copy characters inside [..] to buffer */`
`@@ -197,7 +203,7 @@ re_t re_compile(const char* pattern)`
`197`	`203`	`{`
`198`	`204`	`if (pattern[i] == '\\')`
`199`	`205`	`{`
`200`		`- if (ccl_bufidx >= MAX_CHAR_CLASS_LEN - 1)`
	`206`	`+ if (re_compiled->data_len >= char_limit)`
`201`	`207`	`{`
`202`	`208`	`//fputs("exceeded internal buffer!\n", stderr);`
`203`	`209`	`return 0;`
`@@ -206,31 +212,32 @@ re_t re_compile(const char* pattern)`
`206`	`212`	`{`
`207`	`213`	`return 0;`
`208`	`214`	`}`
`209`		`- ccl_buf[ccl_bufidx++] = pattern[i++];`
	`215`	`+ re_compiled->data[re_compiled->data_len++] = pattern[i++];`
`210`	`216`	`}`
`211`		`- else if (ccl_bufidx >= MAX_CHAR_CLASS_LEN)`
	`217`	`+ // TODO: I think this "else if" is a bug, should just be "if"`
	`218`	`+ else if (re_compiled->data_len >= char_limit)`
`212`	`219`	`{`
`213`	`220`	`//fputs("exceeded internal buffer!\n", stderr);`
`214`	`221`	`return 0;`
`215`	`222`	`}`
`216`		`- ccl_buf[ccl_bufidx++] = pattern[i];`
	`223`	`+ re_compiled->data[re_compiled->data_len++] = pattern[i];`
`217`	`224`	`}`
`218`		`- if (ccl_bufidx >= MAX_CHAR_CLASS_LEN)`
	`225`	`+ if (re_compiled->data_len >= char_limit)`
`219`	`226`	`{`
`220`	`227`	`/* Catches cases such as [00000000000000000000000000000000000000][ */`
`221`	`228`	`//fputs("exceeded internal buffer!\n", stderr);`
`222`	`229`	`return 0;`
`223`	`230`	`}`
`224`	`231`	`/* Null-terminate string end */`
`225`		`- ccl_buf[ccl_bufidx++] = 0;`
`226`		`- re_compiled[j].u.ccl = &ccl_buf[buf_begin];`
	`232`	`+ re_compiled->data[re_compiled->data_len++] = 0;`
`227`	`233`	`} break;`
`228`	`234`
`229`	`235`	`/* Other characters: */`
`230`	`236`	`default:`
`231`	`237`	`{`
`232`		`- re_compiled[j].type = CHAR;`
`233`		`- re_compiled[j].u.ch = c;`
	`238`	`+ re_compiled->type = CHAR;`
	`239`	`+ re_compiled->data_len = 1;`
	`240`	`+ re_compiled->data[0] = c;`
`234`	`241`	`} break;`
`235`	`242`	`}`
`236`	`243`	`/* no buffer-out-of-bounds access on invalid patterns - see https://github.com/kokke/tiny-regex-c/commit/1a279e04014b70b0695fba559a7c05d55e6ee90b */`
`@@ -240,35 +247,39 @@ re_t re_compile(const char* pattern)`
`240`	`247`	`}`
`241`	`248`
`242`	`249`	`i += 1;`
`243`		`- j += 1;`
	`250`	`+ j += 2 + re_compiled->data_len;`
	`251`	`+ }`
	`252`	`+ if (j + 1 >= MAX_REGEXP_LEN) {`
	`253`	`+ //fputs("exceeded internal buffer!\n", stderr);`
	`254`	`+ return 0;`
`244`	`255`	`}`
`245`	`256`	`/* 'UNUSED' is a sentinel used to indicate end-of-pattern */`
`246`		`- re_compiled[j].type = UNUSED;`
	`257`	`+ re_data[j] = UNUSED;`
	`258`	`+ re_data[j+1] = 0;`
`247`	`259`
`248`		`- return (re_t) re_compiled;`
	`260`	`+ return (re_t) re_data;`
`249`	`261`	`}`
`250`	`262`
`251`	`263`	`void re_print(regex_t* pattern)`
`252`	`264`	`{`
`253`	`265`	`const char* types[] = { "UNUSED", "DOT", "BEGIN", "END", "QUESTIONMARK", "STAR", "PLUS", "CHAR", "CHAR_CLASS", "INV_CHAR_CLASS", "DIGIT", "NOT_DIGIT", "ALPHA", "NOT_ALPHA", "WHITESPACE", "NOT_WHITESPACE", "BRANCH" };`
`254`	`266`
`255`		`- int i;`
`256`	`267`	`int j;`
`257`	`268`	`char c;`
`258`		`- for (i = 0; i < MAX_REGEXP_OBJECTS; ++i)`
	`269`	`+ for (;; pattern = getnext(pattern))`
`259`	`270`	`{`
`260`		`- if (pattern[i].type == UNUSED)`
	`271`	`+ if (pattern->type == UNUSED)`
`261`	`272`	`{`
`262`	`273`	`break;`
`263`	`274`	`}`
`264`	`275`
`265`		`- printf("type: %s", types[pattern[i].type]);`
`266`		`- if (pattern[i].type == CHAR_CLASS \|\| pattern[i].type == INV_CHAR_CLASS)`
	`276`	`+ printf("type: %s", types[pattern->type]);`
	`277`	`+ if (pattern->type == CHAR_CLASS \|\| pattern->type == INV_CHAR_CLASS)`
`267`	`278`	`{`
`268`	`279`	`printf(" [");`
`269`		`- for (j = 0; j < MAX_CHAR_CLASS_LEN; ++j)`
	`280`	`+ for (j = 0; j < pattern->data_len; ++j)`
`270`	`281`	`{`
`271`		`- c = pattern[i].u.ccl[j];`
	`282`	`+ c = pattern->data[j];`
`272`	`283`	`if ((c == '\0') \|\| (c == ']'))`
`273`	`284`	`{`
`274`	`285`	`break;`
`@@ -277,9 +288,9 @@ void re_print(regex_t* pattern)`
`277`	`288`	`}`
`278`	`289`	`printf("]");`
`279`	`290`	`}`
`280`		`- else if (pattern[i].type == CHAR)`
	`291`	`+ else if (pattern->type == CHAR)`
`281`	`292`	`{`
`282`		`- printf(" '%c'", pattern[i].u.ch);`
	`293`	`+ printf(" '%c'", pattern->data[0]);`
`283`	`294`	`}`
`284`	`295`	`printf("\n");`
`285`	`296`	`}`
`@@ -380,24 +391,25 @@ static int matchcharclass(char c, const char* str)`
`380`	`391`	`return 0;`
`381`	`392`	`}`
`382`	`393`
`383`		`-static int matchone(regex_t p, char c)`
	`394`	`+static int matchone(regex_t* p, char c)`
`384`	`395`	`{`
`385`		`- switch (p.type)`
	`396`	`+ switch (p->type)`
`386`	`397`	`{`
`387`	`398`	`case DOT: return matchdot(c);`
`388`		`- case CHAR_CLASS: return matchcharclass(c, (const char*)p.u.ccl);`
`389`		`- case INV_CHAR_CLASS: return !matchcharclass(c, (const char*)p.u.ccl);`
	`399`	`+ case CHAR_CLASS: return matchcharclass(c, (const char*)p->data);`
	`400`	`+ case INV_CHAR_CLASS: return !matchcharclass(c, (const char*)p->data);`
`390`	`401`	`case DIGIT: return matchdigit(c);`
`391`	`402`	`case NOT_DIGIT: return !matchdigit(c);`
`392`	`403`	`case ALPHA: return matchalphanum(c);`
`393`	`404`	`case NOT_ALPHA: return !matchalphanum(c);`
`394`	`405`	`case WHITESPACE: return matchwhitespace(c);`
`395`	`406`	`case NOT_WHITESPACE: return !matchwhitespace(c);`
`396`		`- default: return (p.u.ch == c);`
	`407`	`+ case BEGIN: return 0;`
	`408`	`+ default: return (p->data[0] == c);`
`397`	`409`	`}`
`398`	`410`	`}`
`399`	`411`
`400`		`-static int matchstar(regex_t p, regex_t* pattern, const char* text, int* matchlength)`
	`412`	`+static int matchstar(regex_t* p, regex_t* pattern, const char* text, int* matchlength)`
`401`	`413`	`{`
`402`	`414`	`int prelen = *matchlength;`
`403`	`415`	`const char* prepoint = text;`
`@@ -417,7 +429,7 @@ static int matchstar(regex_t p, regex_t* pattern, const char* text, int* matchle`
`417`	`429`	`return 0;`
`418`	`430`	`}`
`419`	`431`
`420`		`-static int matchplus(regex_t p, regex_t* pattern, const char* text, int* matchlength)`
	`432`	`+static int matchplus(regex_t* p, regex_t* pattern, const char* text, int* matchlength)`
`421`	`433`	`{`
`422`	`434`	`const char* prepoint = text;`
`423`	`435`	`while ((text[0] != '\0') && matchone(p, *text))`
`@@ -435,10 +447,8 @@ static int matchplus(regex_t p, regex_t* pattern, const char* text, int* matchle`
`435`	`447`	`return 0;`
`436`	`448`	`}`
`437`	`449`
`438`		`-static int matchquestion(regex_t p, regex_t* pattern, const char* text, int* matchlength)`
	`450`	`+static int matchquestion(regex_t p, regex_t pattern, const char* text, int* matchlength)`
`439`	`451`	`{`
`440`		`- if (p.type == UNUSED)`
`441`		`- return 1;`
`442`	`452`	`if (matchpattern(pattern, text, matchlength))`
`443`	`453`	`return 1;`
`444`	`454`	`if (text && matchone(p, text++))`
`@@ -493,33 +503,42 @@ static int matchpattern(regex_t* pattern, const char* text, int *matchlength)`
`493`	`503`	`static int matchpattern(regex_t* pattern, const char* text, int* matchlength)`
`494`	`504`	`{`
`495`	`505`	`int pre = *matchlength;`
`496`		`- do`
	`506`	`+ while (1)`
`497`	`507`	`{`
`498`		`- if ((pattern[0].type == UNUSED) \|\| (pattern[1].type == QUESTIONMARK))`
	`508`	`+ if (pattern->type == UNUSED)`
`499`	`509`	`{`
`500`		`- return matchquestion(pattern[0], &pattern[2], text, matchlength);`
	`510`	`+ return 1;`
`501`	`511`	`}`
`502`		`- else if (pattern[1].type == STAR)`
	`512`	`+ regex_t* next_pattern = getnext(pattern);`
	`513`	`+ if (next_pattern->type == QUESTIONMARK)`
`503`	`514`	`{`
`504`		`- return matchstar(pattern[0], &pattern[2], text, matchlength);`
	`515`	`+ return matchquestion(pattern, getnext(next_pattern), text, matchlength);`
`505`	`516`	`}`
`506`		`- else if (pattern[1].type == PLUS)`
	`517`	`+ else if (next_pattern->type == STAR)`
`507`	`518`	`{`
`508`		`- return matchplus(pattern[0], &pattern[2], text, matchlength);`
	`519`	`+ return matchstar(pattern, getnext(next_pattern), text, matchlength);`
`509`	`520`	`}`
`510`		`- else if ((pattern[0].type == END) && pattern[1].type == UNUSED)`
	`521`	`+ else if (next_pattern->type == PLUS)`
	`522`	`+ {`
	`523`	`+ return matchplus(pattern, getnext(next_pattern), text, matchlength);`
	`524`	`+ }`
	`525`	`+ else if ((pattern->type == END) && next_pattern->type == UNUSED)`
`511`	`526`	`{`
`512`	`527`	`return (text[0] == '\0');`
`513`	`528`	`}`
`514`	`529`	`/* Branching is not working properly`
`515`		`- else if (pattern[1].type == BRANCH)`
	`530`	`+ else if (pattern->type == BRANCH)`
`516`	`531`	`{`
`517`		`- return (matchpattern(pattern, text) \|\| matchpattern(&pattern[2], text));`
	`532`	`+ return (matchpattern(pattern, text) \|\| matchpattern(getnext(next_pattern), text));`
`518`	`533`	`}`
`519`	`534`	`*/`
`520`	`535`	`(*matchlength)++;`
	`536`	`+ if (text[0] == '\0')`
	`537`	`+ break;`
	`538`	`+ if (!matchone(pattern, *text++))`
	`539`	`+ break;`
	`540`	`+ pattern = next_pattern;`
`521`	`541`	`}`
`522`		`- while ((text[0] != '\0') && matchone(pattern++, text++));`
`523`	`542`
`524`	`543`	`*matchlength = pre;`
`525`	`544`	`return 0;`