Skip to content

Commit c26ae52

Browse files
authored
Implement ExternalEntityLoader for http downloads (#469)
* Implement ExternalEntityLoader for http downloads * Experimental custom myExternalEntityLoader on libxml2 2.15 and up
1 parent ce0d8dd commit c26ae52

File tree

4 files changed

+52
-2
lines changed

4 files changed

+52
-2
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: xml2
22
Title: Parse XML
3-
Version: 1.4.1
3+
Version: 1.5.0
44
Authors@R: c(
55
person("Hadley", "Wickham", role = "aut"),
66
person("Jim", "Hester", role = "aut"),

NEWS.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# xml2 1.5.0
2+
3+
* Experimental custom myExternalEntityLoader on libxml2 2.15 and up.
4+
15
# xml2 1.4.1
26

37
* Remove a test that broke with libxml2 2.15

R/init.R

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,10 @@ xml_parse_options <- function() {
99
xml_save_options <- function() {
1010
.Call(xml_save_options_)
1111
}
12+
13+
download_file_callback <- function(url){
14+
tmp <- tempfile()
15+
on.exit(unlink(tmp))
16+
download.file(url, tmp, quiet = TRUE)
17+
readBin(tmp, raw(), file.info(tmp)$size)
18+
}

src/xml2_init.c

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
#include <libxml/parser.h>
55
#include <string.h>
66

7+
static xmlExternalEntityLoader defaultLoader = NULL;
8+
79
/* * *
810
* Author: Nick Wellnhofer <[email protected]>
911
* Date: Tue, 24 Oct 2023 15:02:36 +0200
@@ -49,12 +51,49 @@ void handleGenericError(void *ctx, const char *fmt, ...){
4951
Rf_error("%s", buffer);
5052
}
5153

54+
#if LIBXML_VERSION >= 21500
55+
56+
xmlParserInput *download_file_callback(const char *url){
57+
SEXP arg = PROTECT(Rf_mkString(url));
58+
SEXP expr = PROTECT(Rf_install("download_file_callback"));
59+
SEXP call = PROTECT(Rf_lang2(expr, arg));
60+
SEXP env = R_FindNamespace(Rf_mkString("xml2"));
61+
int err = 1;
62+
SEXP out = PROTECT(R_tryEvalSilent(call, env, &err));
63+
if(err) return NULL;
64+
xmlParserInputFlags flags = XML_INPUT_BUF_STATIC | XML_INPUT_USE_SYS_CATALOG;
65+
xmlParserInput *buf = xmlNewInputFromMemory(url, RAW(out), Rf_length(out), flags);
66+
//xmlParserInputBuffer *buf = xmlParserInputBufferCreateMem((char*) RAW(out), Rf_length(out), XML_CHAR_ENCODING_UTF8);
67+
UNPROTECT(4);
68+
return buf;
69+
}
70+
71+
static xmlParserInputPtr myExternalEntityLoader(const char *URL, const char *ID, xmlParserCtxtPtr ctxt){
72+
if (URL && (strncmp(URL, "http://", 7) == 0 || strncmp(URL, "https://", 8) == 0)) {
73+
//REprintf("Fetching external resource %s\n", URL);
74+
xmlParserInput *buf = download_file_callback(URL);
75+
if(buf) return buf;
76+
}
77+
// Fallback to default behavior
78+
if (defaultLoader)
79+
return defaultLoader(URL, ID, ctxt);
80+
return NULL;
81+
}
82+
83+
#endif
84+
85+
5286
void init_libxml2_library(void) {
5387
// Check that header and libs are compatible
5488
LIBXML_TEST_VERSION
5589

5690
xmlInitParser();
5791
xmlSetStructuredErrorFunc(NULL, handleStructuredError);
5892
xmlSetGenericErrorFunc(NULL, handleGenericError);
59-
}
6093

94+
// Set custom download callback
95+
#if LIBXML_VERSION >= 21500
96+
defaultLoader = xmlGetExternalEntityLoader();
97+
xmlSetExternalEntityLoader(myExternalEntityLoader);
98+
#endif
99+
}

0 commit comments

Comments
 (0)