From f971f93e9242cb05ca5da9fd82a07b66e724ea38 Mon Sep 17 00:00:00 2001 From: Jonathan Marler Date: Fri, 5 Mar 2021 13:36:03 -0700 Subject: [PATCH 1/2] allow user to provide regex object storage I've added a new function re_compile_to that takes a pointer to an array of objects and characters. This allows a user to specify their own storage for their regex objects. This way a user can compile multiple regex objects and keep them around if they so choose. At the same time this also means that the user will need to know the value of MAX_REGEXP_OBJECTS and MAX_CHAR_CLASSLEN, so I made those configurable by putting them inside their own "#ifdef"s. This way a user can customize them with -DMAX_REGEXP_OBJECTS=XXX and -DMAX_CHAR_CLASS_LEN=YYY. --- re.c | 14 ++++++++++++-- re.h | 4 +++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/re.c b/re.c index 20d1474..2a72a22 100644 --- a/re.c +++ b/re.c @@ -35,8 +35,13 @@ /* Definitions: */ +#ifndef MAX_REGEXP_OBJECTS #define MAX_REGEXP_OBJECTS 30 /* Max number of regex symbols in expression. */ +#endif + +#ifndef MAX_CHAR_CLASS_LEN #define MAX_CHAR_CLASS_LEN 40 /* Max length of character-class buffer in. */ +#endif enum { UNUSED, DOT, BEGIN, END, QUESTIONMARK, STAR, PLUS, CHAR, CHAR_CLASS, INV_CHAR_CLASS, DIGIT, NOT_DIGIT, ALPHA, NOT_ALPHA, WHITESPACE, NOT_WHITESPACE, /* BRANCH */ }; @@ -111,8 +116,13 @@ re_t re_compile(const char* pattern) /* The sizes of the two static arrays below substantiates the static RAM usage of this module. MAX_REGEXP_OBJECTS is the max number of symbols in the expression. MAX_CHAR_CLASS_LEN determines the size of buffer for chars in all char-classes in the expression. */ - static regex_t re_compiled[MAX_REGEXP_OBJECTS]; - static unsigned char ccl_buf[MAX_CHAR_CLASS_LEN]; + static regex_t static_objects[MAX_REGEXP_OBJECTS]; + static unsigned char static_ccl_buf[MAX_CHAR_CLASS_LEN]; + return re_compile_to(pattern, static_objects, static_ccl_buf); +} + +re_t re_compile_to(const char* pattern, regex_t *re_compiled, unsigned char *ccl_buf) +{ int ccl_bufidx = 1; char c; /* current char in pattern */ diff --git a/re.h b/re.h index 69facc6..0cdc61f 100644 --- a/re.h +++ b/re.h @@ -46,9 +46,11 @@ extern "C"{ typedef struct regex_t* re_t; -/* Compile regex string pattern to a regex_t-array. */ +/* Compile regex string pattern to a static regex_t-array. */ re_t re_compile(const char* pattern); +/* Compile regex string pattern to the given regex_t-array. */ +re_t re_compile_to(const char* pattern, re_t objects, unsigned char *ccl_buf); /* Find matches of the compiled pattern inside text. */ int re_matchp(re_t pattern, const char* text, int* matchlength); From d32d9cdb6fd75904a1cc4190edf3d2af831496c7 Mon Sep 17 00:00:00 2001 From: Jonathan Marler Date: Fri, 5 Mar 2021 14:23:05 -0700 Subject: [PATCH 2/2] move definition of regex_t into public header If the user is to provide their own storage for regex_t, they need to know it's size. --- re.c | 9 --------- re.h | 9 +++++++++ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/re.c b/re.c index 2a72a22..760e41a 100644 --- a/re.c +++ b/re.c @@ -46,15 +46,6 @@ enum { UNUSED, DOT, BEGIN, END, QUESTIONMARK, STAR, PLUS, CHAR, CHAR_CLASS, INV_CHAR_CLASS, DIGIT, NOT_DIGIT, ALPHA, NOT_ALPHA, WHITESPACE, NOT_WHITESPACE, /* BRANCH */ }; -typedef struct regex_t -{ - unsigned char type; /* CHAR, STAR, etc. */ - union - { - unsigned char ch; /* the character itself */ - unsigned char* ccl; /* OR a pointer to characters in class */ - } u; -} regex_t; diff --git a/re.h b/re.h index 0cdc61f..8dd5a4a 100644 --- a/re.h +++ b/re.h @@ -41,6 +41,15 @@ extern "C"{ #endif +typedef struct regex_t +{ + unsigned char type; /* CHAR, STAR, etc. */ + union + { + unsigned char ch; /* the character itself */ + unsigned char* ccl; /* OR a pointer to characters in class */ + } u; +} regex_t; /* Typedef'd pointer to get abstract datatype. */ typedef struct regex_t* re_t;