Skip to content

Enable storing multiple compiled patterns at once #26

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 17 additions & 22 deletions re.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,22 +34,10 @@

/* Definitions: */

#define MAX_REGEXP_OBJECTS 30 /* Max number of regex symbols in expression. */
#define MAX_CHAR_CLASS_LEN 40 /* Max length of character-class buffer in. */


enum { UNUSED, DOT, BEGIN, END, QUESTIONMARK, STAR, PLUS, CHAR, CHAR_CLASS, INV_CHAR_CLASS, DIGIT, NOT_DIGIT, ALPHA, NOT_ALPHA, WHITESPACE, NOT_WHITESPACE, /* BRANCH */ };

typedef struct regex_t
{
unsigned char type; /* CHAR, STAR, etc. */
union
{
unsigned char ch; /* the character itself */
unsigned char* ccl; /* OR a pointer to characters in class */
};
} regex_t;

enum { UNUSED, DOT, BEGIN, END, QUESTIONMARK, STAR, PLUS, CHAR, CHAR_CLASS, INV_CHAR_CLASS, DIGIT, NOT_DIGIT, ALPHA, NOT_ALPHA, WHITESPACE, NOT_WHITESPACE, /* BRANCH, */ FAIL};


/* Private function declarations: */
Expand All @@ -70,12 +58,16 @@ static int ismetachar(char c);
/* Public functions: */
int re_match(const char* pattern, const char* text)
{
return re_matchp(re_compile(pattern), text);
re_t regex;

re_compile(regex, pattern);
return re_matchp(regex, text);
}

int re_matchp(re_t pattern, const char* text)
{
if (pattern != 0)
/* FAIL is a sentinel value indicating compilation of the pattern failed. */
if (pattern[0].type != FAIL)
{
if (pattern[0].type == BEGIN)
{
Expand Down Expand Up @@ -103,12 +95,11 @@ int re_matchp(re_t pattern, const char* text)
return -1;
}

re_t re_compile(const char* pattern)
int re_compile(re_t re_compiled, const char* pattern)
{
/* The sizes of the two static arrays below substantiates the static RAM usage of this module.
MAX_REGEXP_OBJECTS is the max number of symbols in the expression.
MAX_CHAR_CLASS_LEN determines the size of buffer for chars in all char-classes in the expression. */
static regex_t re_compiled[MAX_REGEXP_OBJECTS];
static unsigned char ccl_buf[MAX_CHAR_CLASS_LEN];
int ccl_bufidx = 1;

Expand Down Expand Up @@ -193,22 +184,22 @@ re_t re_compile(const char* pattern)
if (ccl_bufidx >= MAX_CHAR_CLASS_LEN - 1)
{
//fputs("exceeded internal buffer!\n", stderr);
return 0;
goto fail;
}
ccl_buf[ccl_bufidx++] = pattern[i++];
}
else if (ccl_bufidx >= MAX_CHAR_CLASS_LEN)
{
//fputs("exceeded internal buffer!\n", stderr);
return 0;
goto fail;
}
ccl_buf[ccl_bufidx++] = pattern[i];
}
if (ccl_bufidx >= MAX_CHAR_CLASS_LEN)
{
/* Catches cases such as [00000000000000000000000000000000000000][ */
//fputs("exceeded internal buffer!\n", stderr);
return 0;
goto fail;
}
/* Null-terminate string end */
ccl_buf[ccl_bufidx++] = 0;
Expand All @@ -228,12 +219,16 @@ re_t re_compile(const char* pattern)
/* 'UNUSED' is a sentinel used to indicate end-of-pattern */
re_compiled[j].type = UNUSED;

return (re_t) re_compiled;
return j;

fail:
re_compiled[0].type = FAIL;
return -1;
}

void re_print(regex_t* pattern)
{
const char* types[] = { "UNUSED", "DOT", "BEGIN", "END", "QUESTIONMARK", "STAR", "PLUS", "CHAR", "CHAR_CLASS", "INV_CHAR_CLASS", "DIGIT", "NOT_DIGIT", "ALPHA", "NOT_ALPHA", "WHITESPACE", "NOT_WHITESPACE", "BRANCH" };
const char* types[] = { "UNUSED", "DOT", "BEGIN", "END", "QUESTIONMARK", "STAR", "PLUS", "CHAR", "CHAR_CLASS", "INV_CHAR_CLASS", "DIGIT", "NOT_DIGIT", "ALPHA", "NOT_ALPHA", "WHITESPACE", "NOT_WHITESPACE", /* "BRANCH", */ "FAIL"};

int i;
for (i = 0; i < MAX_REGEXP_OBJECTS; ++i)
Expand Down
19 changes: 15 additions & 4 deletions re.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,25 @@
extern "C"{
#endif

#define MAX_REGEXP_OBJECTS 30 /* Max number of regex symbols in expression. */

typedef struct regex_t
{
unsigned char type; /* CHAR, STAR, etc. */
union
{
unsigned char ch; /* the character itself */
unsigned char* ccl; /* OR a pointer to characters in class */
};
} regex_t;

/* Typedef'd pointer to get abstract datatype. */
typedef struct regex_t* re_t;
/* Typedef'd array to get abstract datatype. */
typedef regex_t re_t[MAX_REGEXP_OBJECTS];


/* Compile regex string pattern to a regex_t-array. */
re_t re_compile(const char* pattern);
/* Compile regex string pattern to a regex_t-array and copy to re_compiled.
* Return the number of regex objects copied upon success or -1 upon error. */
int re_compile(re_t re_compiled, const char* pattern);


/* Find matches of the compiled pattern inside text. */
Expand Down
7 changes: 5 additions & 2 deletions tests/test1.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ int main()
size_t ntests = sizeof(test_vector) / sizeof(*test_vector);
size_t nfailed = 0;
size_t i;
re_t regex;

for (i = 0; i < ntests; ++i)
{
Expand All @@ -114,7 +115,8 @@ int main()
if (m != (-1))
{
printf("\n");
re_print(re_compile(pattern));
re_compile(regex, pattern);
re_print(regex);
fprintf(stderr, "[%lu/%lu]: pattern '%s' matched '%s' unexpectedly. \n", (i+1), ntests, pattern, text);
nfailed += 1;
}
Expand All @@ -124,7 +126,8 @@ int main()
if (m == (-1))
{
printf("\n");
re_print(re_compile(pattern));
re_compile(regex, pattern);
re_print(regex);
fprintf(stderr, "[%lu/%lu]: pattern '%s' didn't match '%s' as expected. \n", (i+1), ntests, pattern, text);
nfailed += 1;
}
Expand Down