diff --git a/Units/fuzz-hitting-assertions.r/issue-4342.d/args.ctags b/Units/fuzz-hitting-assertions.r/issue-4342.d/args.ctags new file mode 100644 index 0000000000..c6e1f305b7 --- /dev/null +++ b/Units/fuzz-hitting-assertions.r/issue-4342.d/args.ctags @@ -0,0 +1 @@ +--sort=no \ No newline at end of file diff --git a/Units/fuzz-hitting-assertions.r/issue-4342.d/expected.tags b/Units/fuzz-hitting-assertions.r/issue-4342.d/expected.tags new file mode 100644 index 0000000000..e69de29bb2 diff --git a/Units/fuzz-hitting-assertions.r/issue-4342.d/input-iconv.sql b/Units/fuzz-hitting-assertions.r/issue-4342.d/input-iconv.sql new file mode 100644 index 0000000000..dfde3f7b21 Binary files /dev/null and b/Units/fuzz-hitting-assertions.r/issue-4342.d/input-iconv.sql differ diff --git a/Units/fuzz-hitting-assertions.r/issue-4342.d/input-utf16be.sql b/Units/fuzz-hitting-assertions.r/issue-4342.d/input-utf16be.sql new file mode 100644 index 0000000000..a0aef7dd95 Binary files /dev/null and b/Units/fuzz-hitting-assertions.r/issue-4342.d/input-utf16be.sql differ diff --git a/Units/fuzz-hitting-assertions.r/issue-4342.d/input-utf16le.sql b/Units/fuzz-hitting-assertions.r/issue-4342.d/input-utf16le.sql new file mode 100644 index 0000000000..3c3722a93d Binary files /dev/null and b/Units/fuzz-hitting-assertions.r/issue-4342.d/input-utf16le.sql differ diff --git a/Units/fuzz-hitting-assertions.r/utf16-be-bom.d/.gitattributes b/Units/fuzz-hitting-assertions.r/utf16-be-bom.d/.gitattributes new file mode 100644 index 0000000000..f11e3da219 --- /dev/null +++ b/Units/fuzz-hitting-assertions.r/utf16-be-bom.d/.gitattributes @@ -0,0 +1 @@ +input.cpp binary \ No newline at end of file diff --git a/Units/fuzz-hitting-assertions.r/utf16-be-bom.d/args.ctags b/Units/fuzz-hitting-assertions.r/utf16-be-bom.d/args.ctags new file mode 100644 index 0000000000..c6e1f305b7 --- /dev/null +++ b/Units/fuzz-hitting-assertions.r/utf16-be-bom.d/args.ctags @@ -0,0 +1 @@ +--sort=no \ No newline at end of file diff --git a/Units/fuzz-hitting-assertions.r/utf16-be-bom.d/expected.tags b/Units/fuzz-hitting-assertions.r/utf16-be-bom.d/expected.tags new file mode 100644 index 0000000000..97d967b1ba --- /dev/null +++ b/Units/fuzz-hitting-assertions.r/utf16-be-bom.d/expected.tags @@ -0,0 +1,2 @@ +globalVar input.cpp /^int globalVar;$/;" v typeref:typename:int +BETestClass input.cpp /^class BETestClass {$/;" c file: diff --git a/Units/fuzz-hitting-assertions.r/utf16-be-bom.d/input.cpp b/Units/fuzz-hitting-assertions.r/utf16-be-bom.d/input.cpp new file mode 100644 index 0000000000..e17187b35f Binary files /dev/null and b/Units/fuzz-hitting-assertions.r/utf16-be-bom.d/input.cpp differ diff --git a/Units/fuzz-hitting-assertions.r/utf16-conversion-failure.d/.gitattributes b/Units/fuzz-hitting-assertions.r/utf16-conversion-failure.d/.gitattributes new file mode 100644 index 0000000000..f11e3da219 --- /dev/null +++ b/Units/fuzz-hitting-assertions.r/utf16-conversion-failure.d/.gitattributes @@ -0,0 +1 @@ +input.cpp binary \ No newline at end of file diff --git a/Units/fuzz-hitting-assertions.r/utf16-conversion-failure.d/args.ctags b/Units/fuzz-hitting-assertions.r/utf16-conversion-failure.d/args.ctags new file mode 100644 index 0000000000..c6e1f305b7 --- /dev/null +++ b/Units/fuzz-hitting-assertions.r/utf16-conversion-failure.d/args.ctags @@ -0,0 +1 @@ +--sort=no \ No newline at end of file diff --git a/Units/fuzz-hitting-assertions.r/utf16-conversion-failure.d/expected.tags b/Units/fuzz-hitting-assertions.r/utf16-conversion-failure.d/expected.tags new file mode 100644 index 0000000000..e69de29bb2 diff --git a/Units/fuzz-hitting-assertions.r/utf16-conversion-failure.d/input.cpp b/Units/fuzz-hitting-assertions.r/utf16-conversion-failure.d/input.cpp new file mode 100644 index 0000000000..07b0808641 Binary files /dev/null and b/Units/fuzz-hitting-assertions.r/utf16-conversion-failure.d/input.cpp differ diff --git a/Units/fuzz-hitting-assertions.r/utf16-coverage.d/.gitattributes b/Units/fuzz-hitting-assertions.r/utf16-coverage.d/.gitattributes new file mode 100644 index 0000000000..f11e3da219 --- /dev/null +++ b/Units/fuzz-hitting-assertions.r/utf16-coverage.d/.gitattributes @@ -0,0 +1 @@ +input.cpp binary \ No newline at end of file diff --git a/Units/fuzz-hitting-assertions.r/utf16-coverage.d/args.ctags b/Units/fuzz-hitting-assertions.r/utf16-coverage.d/args.ctags new file mode 100644 index 0000000000..c6e1f305b7 --- /dev/null +++ b/Units/fuzz-hitting-assertions.r/utf16-coverage.d/args.ctags @@ -0,0 +1 @@ +--sort=no \ No newline at end of file diff --git a/Units/fuzz-hitting-assertions.r/utf16-coverage.d/expected.tags b/Units/fuzz-hitting-assertions.r/utf16-coverage.d/expected.tags new file mode 100644 index 0000000000..190ce3aa5e --- /dev/null +++ b/Units/fuzz-hitting-assertions.r/utf16-coverage.d/expected.tags @@ -0,0 +1,7 @@ +TestStruct input.cpp /^struct TestStruct {$/;" s file: +field1 input.cpp /^ int field1;$/;" m struct:TestStruct typeref:typename:int file: +field2 input.cpp /^ char* field2;$/;" m struct:TestStruct typeref:typename:char * file: +methodA input.cpp /^ void methodA() {}$/;" f struct:TestStruct typeref:typename:void file: +TestClass input.cpp /^class TestClass {$/;" c file: +staticVar input.cpp /^ static int staticVar;$/;" m class:TestClass typeref:typename:int file: +privateField input.cpp /^ int privateField;$/;" m class:TestClass typeref:typename:int file: diff --git a/Units/fuzz-hitting-assertions.r/utf16-coverage.d/input.cpp b/Units/fuzz-hitting-assertions.r/utf16-coverage.d/input.cpp new file mode 100644 index 0000000000..72463a9801 Binary files /dev/null and b/Units/fuzz-hitting-assertions.r/utf16-coverage.d/input.cpp differ diff --git a/main/read.c b/main/read.c index 2118ec1e99..16f152514d 100644 --- a/main/read.c +++ b/main/read.c @@ -34,6 +34,7 @@ #ifdef HAVE_ICONV # include "mbcs.h" # include "mbcs_p.h" +# include #endif /* @@ -881,8 +882,32 @@ static MIO *getMioFull (const char *const fileName, const char *const openMode, if (mtime) *mtime = st->mtime; eStatFree (st); - if ((!memStreamRequired) - && (size > MAX_IN_MEMORY_FILE_SIZE || size == 0)) + + /* Always use memory stream for UTF-16 files to enable conversion */ + bool forceMemStream = false; +#ifdef HAVE_ICONV + if (size >= 2) + { + FILE *peek = fopen(fileName, openMode); + if (peek) + { + unsigned char bom[2]; + if (fread(bom, 1, 2, peek) == 2) + { + /* Check for UTF-16 BOM */ + if ((bom[0] == 0xFF && bom[1] == 0xFE) || + (bom[0] == 0xFE && bom[1] == 0xFF)) + { + forceMemStream = true; + } + } + fclose(peek); + } + } +#endif + + if ((!memStreamRequired) && (!forceMemStream) && + (size > MAX_IN_MEMORY_FILE_SIZE || size == 0)) return mio_new_file (fileName, openMode); src = fopen (fileName, openMode); @@ -900,6 +925,62 @@ static MIO *getMioFull (const char *const fileName, const char *const openMode, return mio_new_file (fileName, openMode); } fclose (src); + +#ifdef HAVE_ICONV + /* Check for UTF-16 BOM and convert to UTF-8 if found */ + if (size >= 2) + { + unsigned char *converted_data = NULL; + unsigned long converted_size = 0; + const char *encoding = NULL; + + /* Check for UTF-16 LE BOM (FF FE) */ + if (data[0] == 0xFF && data[1] == 0xFE) + { + encoding = "UTF-16LE"; + } + /* Check for UTF-16 BE BOM (FE FF) */ + else if (data[0] == 0xFE && data[1] == 0xFF) + { + encoding = "UTF-16BE"; + } + + if (encoding != NULL) + { + /* Convert UTF-16 to UTF-8 */ + iconv_t cd = iconv_open("UTF-8", encoding); + if (cd != (iconv_t)-1) + { + /* Skip BOM in input */ + char *inbuf = (char*)(data + 2); + size_t inbytesleft = size - 2; + + /* Allocate output buffer (UTF-8 can be up to 4 bytes per character) */ + size_t outbufsize = inbytesleft * 2; + converted_data = eMalloc(outbufsize); + char *outbuf = (char*)converted_data; + size_t outbytesleft = outbufsize; + + if (iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft) != (size_t)-1) + { + converted_size = outbufsize - outbytesleft; + eFree(data); + data = converted_data; + size = converted_size; + converted_data = NULL; /* Prevent double free */ + } + else + { + /* Conversion failed, fall back to original data */ + if (converted_data) + eFree(converted_data); + } + iconv_close(cd); + } + } + } +#endif + return mio_new_memory (data, size, eRealloc, eFreeNoNullCheck); }