1414
1515#include "msc_xml.h"
1616
17+ static void msc_xml_on_start_elementns (
18+ void * ctx ,
19+ const xmlChar * localname ,
20+ const xmlChar * prefix ,
21+ const xmlChar * URI ,
22+ int nb_namespaces ,
23+ const xmlChar * * namespaces ,
24+ int nb_attributes ,
25+ int nb_defaulted ,
26+ const xmlChar * * attributes
27+ ) {
28+
29+ // get the length of XML tag (localname)
30+ size_t taglen = strlen ((const char * )localname );
31+ modsec_rec * msr = (modsec_rec * )ctx ;
32+ msc_xml_parser_state * xml_parser_state = msr -> xml -> xml_parser_state ;
33+
34+ // pathlen contains the concatenated strings of tags with '.'
35+ // eg xml.root.level1.leaf
36+ xml_parser_state -> pathlen += (taglen + 1 );
37+ char * newpath = apr_pstrcat (msr -> mp , xml_parser_state -> currpath , "." , (char * )localname , NULL );
38+ xml_parser_state -> currpath = newpath ;
39+
40+ int * new_stack_item = (int * )apr_array_push (xml_parser_state -> has_child_stack );
41+ * new_stack_item = 0 ;
42+ xml_parser_state -> depth ++ ;
43+ // set the current value to null
44+ // this is necessary because if there is any text between the tags (new line, etc)
45+ // it will be added to the current value
46+ xml_parser_state -> currval = NULL ;
47+
48+ // if there is an item before the current one we set that has a child
49+ if (xml_parser_state -> depth > 1 ) {
50+ int * parent_stack_item = & ((int * )xml_parser_state -> has_child_stack -> elts )[xml_parser_state -> has_child_stack -> nelts - 2 ];
51+ * parent_stack_item = 1 ;
52+ }
53+
54+ }
55+
56+ static void msc_xml_on_end_elementns (
57+ void * ctx ,
58+ const xmlChar * localname ,
59+ const xmlChar * prefix ,
60+ const xmlChar * URI
61+ ) {
62+
63+ size_t taglen = strlen ((const char * )localname );
64+ modsec_rec * msr = (modsec_rec * )ctx ;
65+ msc_xml_parser_state * xml_parser_state = msr -> xml -> xml_parser_state ;
66+
67+ // if the node is a leaf we add it as argument
68+ // get the top item from the stack which tells this info
69+ int * top_stack_item = apr_array_pop (xml_parser_state -> has_child_stack );
70+ if (* top_stack_item == 0 ) {
71+
72+ if (apr_table_elts (msr -> arguments )-> nelts >= msr -> txcfg -> arguments_limit ) {
73+ if (msr -> txcfg -> debuglog_level >= 4 ) {
74+ msr_log (msr , 4 , "Skipping request argument, over limit (XML): name \"%s\", value \"%s\"" ,
75+ log_escape_ex (msr -> mp , xml_parser_state -> currpath , strlen (xml_parser_state -> currpath )),
76+ log_escape_ex (msr -> mp , xml_parser_state -> currval , strlen (xml_parser_state -> currval )));
77+ }
78+ msr -> msc_reqbody_error = 1 ;
79+ msr -> xml -> xml_error = apr_psprintf (msr -> mp , "More than %ld ARGS (GET + XML)" , msr -> txcfg -> arguments_limit );
80+ xmlStopParser ((xmlParserCtxtPtr )msr -> xml -> parsing_ctx_arg );
81+ }
82+ else {
83+
84+ msc_arg * arg = (msc_arg * ) apr_pcalloc (msr -> mp , sizeof (msc_arg ));
85+
86+ arg -> name = xml_parser_state -> currpath ;
87+ arg -> name_len = strlen (arg -> name );
88+ arg -> value = xml_parser_state -> currval ;
89+ arg -> value_len = strlen (xml_parser_state -> currval );
90+ arg -> value_origin_len = arg -> value_len ;
91+ arg -> origin = "XML" ;
92+
93+ if (msr -> txcfg -> debuglog_level >= 9 ) {
94+ msr_log (msr , 9 , "Adding XML argument '%s' with value '%s'" ,
95+ xml_parser_state -> currpath , xml_parser_state -> currval );
96+ }
97+
98+ apr_table_addn (msr -> arguments ,
99+ log_escape_nq_ex (msr -> mp , arg -> name , arg -> name_len ), (void * ) arg );
100+ } // end else
101+ } // end top_stack_item == 0
102+
103+ // decrease the length of current path length - +1 because of the '\0'
104+ xml_parser_state -> pathlen -= (taglen + 1 );
105+
106+ // -1 is needed because we don't need the last '.'
107+ char * newpath = apr_pstrndup (msr -> mp , xml_parser_state -> currpath , xml_parser_state -> pathlen - 1 );
108+ xml_parser_state -> currpath = newpath ;
109+
110+ xml_parser_state -> depth -- ;
111+ xml_parser_state -> currval = NULL ;
112+ }
113+
114+ static void msc_xml_on_characters (void * ctx , const xmlChar * ch , int len ) {
115+
116+ modsec_rec * msr = (modsec_rec * )ctx ;
117+ msc_xml_parser_state * xml_parser_state = msr -> xml -> xml_parser_state ;
118+
119+ // libxml2 SAX parser will call this function multiple times
120+ // during the parsing of a single node, if the value has multibyte
121+ // characters, so we need to concatenate the values
122+ xml_parser_state -> currval = apr_pstrcat (msr -> mp ,
123+ ((xml_parser_state -> currval != NULL ) ? xml_parser_state -> currval : "" ),
124+ apr_pstrndup (msr -> mp , (const char * )ch , len ),
125+ NULL );
126+ // check if the memory allocation was successful
127+ if (xml_parser_state -> currval == NULL ) {
128+ msr -> xml -> xml_error = apr_psprintf (msr -> mp , "Failed to allocate memory for XML value." );
129+ xmlStopParser ((xmlParserCtxtPtr )msr -> xml -> parsing_ctx_arg );
130+ }
131+
132+ }
133+
134+
17135static xmlParserInputBufferPtr
18136xml_unload_external_entity (const char * URI , xmlCharEncoding enc ) {
19137 return NULL ;
@@ -37,6 +155,33 @@ int xml_init(modsec_rec *msr, char **error_msg) {
37155 entity = xmlParserInputBufferCreateFilenameDefault (xml_unload_external_entity );
38156 }
39157
158+ if (msr -> txcfg -> parse_xml_into_args != MSC_XML_ARGS_OFF ) {
159+
160+ msr -> xml -> sax_handler = (xmlSAXHandler * )apr_pcalloc (msr -> mp , sizeof (xmlSAXHandler ));
161+ memset (msr -> xml -> sax_handler , 0 , sizeof (xmlSAXHandler ));
162+ if (msr -> xml -> sax_handler == NULL ) {
163+ * error_msg = apr_psprintf (msr -> mp , "XML: Failed to create SAX handler." );
164+ return -1 ;
165+ }
166+
167+ msr -> xml -> sax_handler -> initialized = XML_SAX2_MAGIC ;
168+ msr -> xml -> sax_handler -> startElementNs = msc_xml_on_start_elementns ;
169+ msr -> xml -> sax_handler -> endElementNs = msc_xml_on_end_elementns ;
170+ msr -> xml -> sax_handler -> characters = msc_xml_on_characters ;
171+
172+ // set the parser state struct
173+ msr -> xml -> xml_parser_state = apr_pcalloc (msr -> mp , sizeof (msc_xml_parser_state ));
174+ msr -> xml -> xml_parser_state -> depth = 0 ;
175+ msr -> xml -> xml_parser_state -> pathlen = 4 ; // "xml\0"
176+ msr -> xml -> xml_parser_state -> currpath = apr_pstrdup (msr -> mp , "xml" );
177+ msr -> xml -> xml_parser_state -> currval = NULL ;
178+ msr -> xml -> xml_parser_state -> currpathbufflen = 4 ;
179+ // initialize the stack with item of 10
180+ // this will store the information about nodes
181+ // 10 is just an initial value, it can be automatically incremented
182+ msr -> xml -> xml_parser_state -> has_child_stack = apr_array_make (msr -> mp , 10 , sizeof (int ));
183+ }
184+
40185 return 1 ;
41186}
42187
@@ -68,7 +213,7 @@ int xml_process_chunk(modsec_rec *msr, const char *buf, unsigned int size, char
68213 * enable us to pass it the first chunk of data so that
69214 * it can attempt to auto-detect the encoding.
70215 */
71- if (msr -> xml -> parsing_ctx == NULL ) {
216+ if (msr -> xml -> parsing_ctx == NULL && msr -> xml -> parsing_ctx_arg == NULL ) {
72217
73218 /* First invocation. */
74219
@@ -86,18 +231,52 @@ int xml_process_chunk(modsec_rec *msr, const char *buf, unsigned int size, char
86231
87232 */
88233
89- msr -> xml -> parsing_ctx = xmlCreatePushParserCtxt (NULL , NULL , buf , size , "body.xml" );
90- if (msr -> xml -> parsing_ctx == NULL ) {
91- * error_msg = apr_psprintf (msr -> mp , "XML: Failed to create parsing context." );
92- return -1 ;
234+ if (msr -> txcfg -> parse_xml_into_args != MSC_XML_ARGS_ONLYARGS ) {
235+ msr -> xml -> parsing_ctx = xmlCreatePushParserCtxt (NULL , NULL , buf , size , "body.xml" );
236+ if (msr -> xml -> parsing_ctx == NULL ) {
237+ * error_msg = apr_psprintf (msr -> mp , "XML: Failed to create parsing context." );
238+ return -1 ;
239+ }
240+ }
241+ if (msr -> txcfg -> parse_xml_into_args != MSC_XML_ARGS_OFF ) {
242+ msr -> xml -> parsing_ctx_arg = xmlCreatePushParserCtxt (
243+ msr -> xml -> sax_handler ,
244+ msr ,
245+ buf ,
246+ size ,
247+ NULL );
248+ if (msr -> xml -> parsing_ctx_arg == NULL ) {
249+ * error_msg = apr_psprintf (msr -> mp , "XML: Failed to create parsing context for ARGS." );
250+ return -1 ;
251+ }
93252 }
94253 } else {
95254
96255 /* Not a first invocation. */
256+ msr_log (msr , 4 , "XML: Continue parsing." );
257+ if (msr -> xml -> parsing_ctx != NULL &&
258+ msr -> txcfg -> parse_xml_into_args != MSC_XML_ARGS_ONLYARGS ) {
259+ xmlParseChunk (msr -> xml -> parsing_ctx , buf , size , 0 );
260+ if (msr -> xml -> parsing_ctx -> wellFormed != 1 ) {
261+ * error_msg = apr_psprintf (msr -> mp , "XML: Failed to parse document." );
262+ return -1 ;
263+ }
264+ }
97265
98- xmlParseChunk (msr -> xml -> parsing_ctx , buf , size , 0 );
99- if (msr -> xml -> parsing_ctx -> wellFormed != 1 ) {
100- * error_msg = apr_psprintf (msr -> mp , "XML: Failed parsing document." );
266+ if (msr -> xml -> parsing_ctx_arg != NULL &&
267+ msr -> txcfg -> parse_xml_into_args != MSC_XML_ARGS_OFF ) {
268+ if (xmlParseChunk (msr -> xml -> parsing_ctx_arg , buf , size , 0 ) != 0 ) {
269+ if (msr -> xml -> xml_error ) {
270+ * error_msg = msr -> xml -> xml_error ;
271+ }
272+ else {
273+ * error_msg = apr_psprintf (msr -> mp , "XML: Failed to parse document for ARGS." );
274+ }
275+ return -1 ;
276+ }
277+ }
278+ if (msr -> xml -> xml_error ) {
279+ * error_msg = msr -> xml -> xml_error ;
101280 return -1 ;
102281 }
103282 }
@@ -114,23 +293,44 @@ int xml_complete(modsec_rec *msr, char **error_msg) {
114293 * error_msg = NULL ;
115294
116295 /* Only if we have a context, meaning we've done some work. */
117- if (msr -> xml -> parsing_ctx != NULL ) {
118- /* This is how we signalise the end of parsing to libxml. */
119- xmlParseChunk (msr -> xml -> parsing_ctx , NULL , 0 , 1 );
296+ if (msr -> xml -> parsing_ctx != NULL || msr -> xml -> parsing_ctx_arg != NULL ) {
297+ if (msr -> xml -> parsing_ctx != NULL &&
298+ msr -> txcfg -> parse_xml_into_args != MSC_XML_ARGS_ONLYARGS ) {
299+ /* This is how we signal the end of parsing to libxml. */
300+ xmlParseChunk (msr -> xml -> parsing_ctx , NULL , 0 , 1 );
120301
121- /* Preserve the results for our reference. */
122- msr -> xml -> well_formed = msr -> xml -> parsing_ctx -> wellFormed ;
123- msr -> xml -> doc = msr -> xml -> parsing_ctx -> myDoc ;
302+ /* Preserve the results for our reference. */
303+ msr -> xml -> well_formed = msr -> xml -> parsing_ctx -> wellFormed ;
304+ msr -> xml -> doc = msr -> xml -> parsing_ctx -> myDoc ;
124305
125- /* Clean up everything else. */
126- xmlFreeParserCtxt (msr -> xml -> parsing_ctx );
127- msr -> xml -> parsing_ctx = NULL ;
128- msr_log (msr , 4 , "XML: Parsing complete (well_formed %u)." , msr -> xml -> well_formed );
306+ /* Clean up everything else. */
307+ xmlFreeParserCtxt (msr -> xml -> parsing_ctx );
308+ msr -> xml -> parsing_ctx = NULL ;
309+ msr_log (msr , 4 , "XML: Parsing complete (well_formed %u)." , msr -> xml -> well_formed );
129310
130- if (msr -> xml -> well_formed != 1 ) {
131- * error_msg = apr_psprintf (msr -> mp , "XML: Failed parsing document." );
132- return -1 ;
311+ if (msr -> xml -> well_formed != 1 ) {
312+ * error_msg = apr_psprintf (msr -> mp , "XML: Failed to parse document." );
313+ return -1 ;
314+ }
315+ }
316+
317+ if (msr -> xml -> parsing_ctx_arg != NULL &&
318+ msr -> txcfg -> parse_xml_into_args != MSC_XML_ARGS_OFF ) {
319+ if (xmlParseChunk (msr -> xml -> parsing_ctx_arg , NULL , 0 , 1 ) != 0 ) {
320+ if (msr -> xml -> xml_error ) {
321+ * error_msg = msr -> xml -> xml_error ;
322+ }
323+ else {
324+ * error_msg = apr_psprintf (msr -> mp , "XML: Failed to parse document for ARGS." );
325+ }
326+ xmlFreeParserCtxt (msr -> xml -> parsing_ctx_arg );
327+ msr -> xml -> parsing_ctx_arg = NULL ;
328+ return -1 ;
329+ }
330+ xmlFreeParserCtxt (msr -> xml -> parsing_ctx_arg );
331+ msr -> xml -> parsing_ctx_arg = NULL ;
133332 }
333+
134334 }
135335
136336 return 1 ;
@@ -152,6 +352,15 @@ apr_status_t xml_cleanup(modsec_rec *msr) {
152352 xmlFreeParserCtxt (msr -> xml -> parsing_ctx );
153353 msr -> xml -> parsing_ctx = NULL ;
154354 }
355+ if (msr -> xml -> parsing_ctx_arg != NULL ) {
356+
357+ if (msr -> xml -> parsing_ctx_arg -> myDoc ) {
358+ xmlFreeDoc (msr -> xml -> parsing_ctx_arg -> myDoc );
359+ }
360+
361+ xmlFreeParserCtxt (msr -> xml -> parsing_ctx_arg );
362+ msr -> xml -> parsing_ctx_arg = NULL ;
363+ }
155364 if (msr -> xml -> doc != NULL ) {
156365 xmlFreeDoc (msr -> xml -> doc );
157366 msr -> xml -> doc = NULL ;
0 commit comments