-
Notifications
You must be signed in to change notification settings - Fork 26
/
Copy pathpreg.c
537 lines (481 loc) · 16.2 KB
/
preg.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
/*
* Copyright (C) 2007-2013 Rich Waters <[email protected]>
*
* This file is part of lib_mysqludf_preg.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/** @file preg.c
*
* @brief Provides some functions that are shared by the various
* preg udf functions.
*/
#include "ghmysql.h"
#include "preg.h"
/* For pthreads */
#include <pthread.h>
/*
* Public Functions:
*/
/**
* @fn pcre *pregCompileRegexArg( UDF_ARGS *args , char *msg , int msglen )
*
* @brief compile the regex (arg[0])
*
* @param args - the args supplied by mysql udf api (ultimately, the user)
* @param msg - buffer where error messages can be placed
* @param msglen - size of the error message buffer above
*
* @return - if successful - the compiled regular expression
* @return - if failure - NULL
*
* @details
* This function compiles the pcre regular expression passed in as
* the first argument. The argument passed
* as args->args[0] is a pattern that needs to include delimiters and
* may include modifiers. (ie. /([a-z0-9]*?)(.*)/i ). This function
* is necessary because compileRegex (from_php.c) requires a string
* argument. This function null terminates the first argument and
* calls compileRegex.
*
* @note
* make sure to call pcre_free to free up the returned result (if not null)
*
*/
pcre *pregCompileRegexArg( UDF_ARGS *args , char *msg , int msglen )
{
pcre *re ; /* the compiled pattern */
char *val ; /* The pattern to compile */
*msg ='\0';
val = ghargdup( args , 0 ) ;
if( !val )
{
if( args->lengths[0] && args->args[0] )
{
strncpy( msg , "Out of memory" , msglen ) ;
}
else
{
strncpy( msg , "Empty pattern" , msglen ) ;
}
return NULL ;
}
re = compileRegex( val , args->lengths[0], msg, msglen ) ;
free( val ) ;
return re ;
}
/**
* @fn int initPtrInfo( struct preg_s *ptr ,UDF_ARGS *args,char *message )
*
* @brief initialize contents of initid->ptr
*
* @param ptr - the pointer to initialize the info in
* @param args - the args supplied by mysql udf api (ultimately, the user)
* @param message - put error message in here if error
*
* @return 0 - on success
* @return 1 - on error
*
* @details
* Compile the regex and save it in ptr->re. This function should
* normally only be called if the first argument is a constant.
*
* @note
* make sure to call destroyPtrInfo when done
*/
int initPtrInfo( struct preg_s *ptr ,UDF_ARGS *args,char *message )
{
// 128 is a safe size for mysql, which reccomends 80 chars or less messages
ptr->re = pregCompileRegexArg( args, message,128 );
if( !ptr->re )
{
return 1;
}
return 0 ;
}
/**
* @fn int *pregCreateOffsetsVector( pcre *re , pcre_extra *extra , int *count,
* char *msg , int msglen )
*
* @brief allocate a memory area that can be used for holding the offset vector
* used by the pcre library.
*
* @param re - compiled regular expression
* @param extra - NULL or extra info about re as returned by pcre_study
* @param count - pointer to integer to place number of captures into
* @param msg - put error messages here
* @param msglen - length of msg buffer
*
* @return pointer to new offets vector - on success
* @return NULL if out of memory or error from pcre
*
* @details The ovector is used by the pcre for capturing the offsets of
* the parenthesized sub-expressions of a pcre expression. This function
* uses pcre_fullinfo to retrieve the number of capture groups in the
* expression, and then it allocates a vector of that size+1 and
* sets the passed in count to that size as well.
*/
int *pregCreateOffsetsVector( pcre *re , pcre_extra *extra , int *count ,
char *msg , int msglen )
{
int *ovec ; /* vector to return */
int oveccount ; /* number of capture groups found */
*count = 0 ;
if( pcre_fullinfo(re, extra , PCRE_INFO_CAPTURECOUNT, &oveccount ) < 0 )
{
strncpy(msg,"preg: error retrieving information about pattern",msglen);
return NULL ;
}
++oveccount ; // for 0
oveccount *= 3 ; // 2 for offset info , 1 for pcre internals
ovec = malloc( sizeof( int ) * oveccount ) ;
if( !ovec )
{
strncpy( msg , "preg: out of memory" , msglen ) ;
return NULL ;
}
*count = oveccount ;
return ovec ;
}
/**
* @fn int pregGetGroupNum( pcre *re , UDF_ARGS *args , int argnum )
*
* @brief gets the string number of a capture group from a pcre
*
* @param re - compiled regular expression
* @param args - the args to the mysql UDF
* @param argnum - the index of the args that specifies the group number to get
*
* @return int >= 0 string number that can be passed to pcre_get_substring
* @return -1 if group number not found or other error
*
* @details This function extract the desired group number from the
* given arguments. If it is a named capture group, it is converted
* to a number using pcre_get_stringnumber. This number is then returned.
*/
int pregGetGroupNum( pcre *re , UDF_ARGS *args , int argnum )
{
char *group ; /* named group - args[argnum] */
int groupnum ; /* string number of capture group */
// The groupnum was specified as an optional parameter
if( argnum >= args->arg_count )
groupnum = 0 ;
else if( args->arg_type[argnum] == INT_RESULT )
{ // numeric capture group
groupnum = (int)(*(longlong *)args->args[2]) ;
}
else
{
// This is a named group. The numeric groupnum must be found
group = ghargdup( args , 2 ) ;
if( !group ) {
fprintf(stderr,"pregGetGroupNum: error accessing capture group\n");
return -1 ;
}
groupnum =pcre_get_stringnumber(re , group);
free( group ) ;
}
return groupnum ;
}
/**
* @fn int pregSkipToOccurence( pcre *re , char *subject , int subject_len ,
* int *ovector , int oveccount , int occurence,
* int *rc)
*
* @brief return a pointer to the nth occurence of a pcre in a string
*
* @param re - compiled regular expression
* @param subject - the string on which to perform matching
* @param subject_len - length of the subject string
* @param ovector - vector used by pcre to capture offets of matches
* @param oveccount - size of ovector
* @param occurence - match occurence to find
* @param rc - put result of last pcre_exec call here
*
* @return char * - portion of string which starts with pcre occurence requested
* @return -1 if group number not found or other error
*
* @details This function extract the desired group number from the
* given arguments. If it is a named capture group, it is converted
* to a number using pcre_get_stringnumber. This number is then returned.
*/
char *pregSkipToOccurence( pcre *re , char *subject , int subject_len ,
int *ovector , int oveccount , int occurence,
int *rc)
{
char *ex_subject ; /* position of last match */
int subject_offset = 0 ; /* offset of next match from last one */
char *ret = NULL ; /* return value from this function */
pcre_extra extra;
ex_subject = subject ;
memset(&extra, 0, sizeof(extra));
pregSetLimits(&extra);
// Skip over the 1st N occurences
while( occurence-- && subject_offset <= subject_len ) {
// Run the regex and find the groupnum if possible
*rc = pcre_exec(re, &extra, subject + subject_offset ,
subject_len - subject_offset, 0,0,
ovector, oveccount);
if( *rc <= 0 )
break ;
ex_subject = subject + subject_offset ;
subject_offset += ovector[1] ;
}
if( rc > 0 )
ret = ex_subject ;
return ret ;
}
/**
* @fn void destroyPtrInfo( struct preg_s *ptr )
*
* @brief free up the memory used by ptr and alloced in initPtrInfo
*
* @param ptr - free members of this struct
*/
void destroyPtrInfo( struct preg_s *ptr )
{
if( ptr->re )
{
pcre_free( ptr->re ) ;
ptr->re = NULL ;
}
if( ptr->return_buffer ) {
free( ptr->return_buffer ) ;
ptr->return_buffer = NULL ;
}
}
/**
* @fn void pregDeInit(UDF_INIT *initid)
*
* @brief shared _deinit routines used by preg.
*
* @param initid - various info supplied by mysql api - read more at
* http://dev.mysql.com/doc/refman/5.0/en/adding-udf.html
*
* @details - frees the ptr members and then frees the ptr itself. It
* can usually be the only thing called by the _deinit functions of the
* preg routeines.
*/
void pregDeInit(UDF_INIT *initid)
{
struct preg_s *ptr ; /* to avoid casting */
if (initid->ptr)
{
ptr = (struct preg_s *)initid->ptr ;
destroyPtrInfo( ptr ) ;
free( ptr ) ;
initid->ptr = NULL ;
}
}
/**
* @fn bool pregInit(UDF_INIT *initid, UDF_ARGS *args, char *message)
*
* @brief
* Perform the init stuff common to all preg routines
*
* @param initid - various info supplied by mysql api - read more at
* http://dev.mysql.com/doc/refman/5.0/en/adding-udf.html
*
* @param args - array of information about arguments from the SQL call
* See file documentation for the description of the SQL arguments
*
* @param message - for error messages. Should be <80 but can be 255.
*
* @return 0 - on success
* @return 1 - on error
*
* @details This function is called from the _init routines for the preg
* functions. It performs the initializations common to all or most of
* those routines. This includes converting the 1st 2 args to strings,
* and compiling the first argument (the pattern) if it
* is a constant.
*/
bool pregInit(UDF_INIT *initid, UDF_ARGS *args, char *message)
{
struct preg_s *ptr; /* temp holder of initid->ptr */
int i ;
// use calloc so deInit can check for NULL's before freeing
initid->ptr = (char *)calloc( 1,sizeof( struct preg_s ) ) ;
ptr = (struct preg_s *)initid->ptr ;
if( !ptr )
{
strcpy(message,"not enough memory");
return 1;
}
if( ghargIsNullConstant( args , 0 ) )
{
ptr->constant_pattern = 1 ;
#ifdef GH_1_0_NULL_HANDLING
strcpy( message, "NULL pattern" ) ;
return 1 ;
#endif
}
// Convert first 2 args (pattern & subject) to strings.
for (i=0 ; i < 2; i++)
args->arg_type[i]=STRING_RESULT;
if(args->arg_count && args->args[0] )
{
if( initPtrInfo( ptr , args , message ) )
{
return 1;
}
/**
* If the pattern is constant, compile it once to improve perfomance.
* Set the constant_pattern member to inform main function.
*/
ptr->constant_pattern = 1 ;
}
else
{
ptr->constant_pattern = 0 ;
}
if( ((int)initid->max_length) > 0 )
{
ptr->return_buffer_size = initid->max_length + 1 ;
}
else
{
// If there is no limit on max_length. Start at a fairly big
// size. Re-allocations will occur if necessary.
ptr->return_buffer_size = 1024000 ;
}
ptr->return_buffer = malloc( ptr->return_buffer_size ) ;
return 0 ;
}
/**
* int pregCopyToReturnBuffer( struct preg_s *ptr , char *s , int l )
*
* @brief
* safely copies data into ptr->return_buffer
*
* @param ptr - the info stored in initid->ptr
* @param s - data to be copied
* @param l - length of data to be copied
*
* @return the number of bytes copied - on success
* @return -1 - on error
*
* @details This function checks to see if ptr->return_buffer is big
* enough to hold the given data. If it isn't, reallocs occur.
* Then the data is copied.
*
* @note
* The return buffer is null-terminated, as well. This shouldn't be
* necessary, but it can help to prevent potential crashes.
*/
int pregCopyToReturnBuffer( struct preg_s *ptr , char *s , int l )
{
char *newbuf ;
if( (l+1) > ptr->return_buffer_size )
{
newbuf = malloc( l + 1 ) ;
if( !newbuf )
{
fprintf( stderr ,
"preg: out of memory reallocing return buffer\n" ) ;
return -1 ;
}
free( ptr->return_buffer ) ;
ptr->return_buffer = newbuf ;
ptr->return_buffer_size = l + 1 ;
}
memcpy( ptr->return_buffer , s , l ) ;
ptr->return_buffer[ l ] = 0 ;
return l ;
}
/**
* @fn char *pregMoveToReturnValues( UDF_INIT *initid ,
* unsigned long *length ,
* char *is_null , char *error ,
* char *s , int s_len )
*
* @brief
* set the appropriate UDF return values to the given data for UDF's
* that return strings, and frees passed in data pointer.
*
* @param initid - various info supplied by mysql api - read more at
* http://dev.mysql.com/doc/refman/5.0/en/adding-udf.html
*
* @param length - put the length of the returned string here.
* @param is_null - set this if return value is null and NULL is allowed
* @param error - set if an error occurs
* @param s - string to copy into return buffer
* @param s_len - length of string to copy into return buffer
*
* @return - pointer to the string to return (ptr->return_buffer)
* @return - NULL - if error occured and NULL's are allowed
*
* @details This function checks the given length and data for
* negatives and NULL, respectively. This function should usually
* be called using the results from a pcre function call, and it
* prints the appropriate error message is the given length is <0.
* Otherwise, it checks for non-NULL data uses pregCopyToReturnBuffer
* to copy given data into ptr->return_buffer. If this copy is
* successful, it frees the passed in string.
*
* @note. This function frees the passed in string after copying it. Careful!
*/
char *pregMoveToReturnValues( UDF_INIT *initid ,
unsigned long *length ,
char *is_null , char *error ,
char *s , int s_len )
{
struct preg_s *ptr ; /* local holder of initid->ptr */
int l ; /* bytes copied into return_buffer */
ptr = (struct preg_s *)initid->ptr ;
// Set default return info.
*error = 1 ;
*length = 0 ;
*ptr->return_buffer = '\0';
if( initid->maybe_null )
{
*is_null = 1 ;
}
if( s_len >= 0 )
{
if( s )
{ // normal case -- copy data & free it
l = pregCopyToReturnBuffer( ptr , s , s_len ) ;
if( l >= 0 )
{
*is_null = 0 ;
*error = 0 ;
*length = l ;
}
free( s ) ;
}
else
{ // Empty string is not error?
*is_null = 0 ;
*error = 0 ;
}
}
else
{
ghlogprintf("ERROR preg: pcre_exec rturned error %d (%s)\n" , s_len, pregExecErrorString(s_len) ) ;
}
if( *is_null )
{
return NULL ;
}
else
{
return ptr->return_buffer ;
}
}