diff --git a/build/sphinx/mongoc_common.py b/build/sphinx/mongoc_common.py index 6ddf87c7fdf..f46a805d220 100644 --- a/build/sphinx/mongoc_common.py +++ b/build/sphinx/mongoc_common.py @@ -8,6 +8,7 @@ from sphinx.application import Sphinx from sphinx.application import logger as sphinx_log + try: from sphinx.builders.dirhtml import DirectoryHTMLBuilder except ImportError: @@ -16,7 +17,8 @@ from sphinx.config import Config from docutils.parsers.rst import Directive -needs_sphinx = "1.7" # Do not require newer sphinx. EPEL packages build man pages with Sphinx 1.7.6. Refer: CDRIVER-4767 +# Do not require newer sphinx. EPEL packages build man pages with Sphinx 1.7.6. Refer: CDRIVER-4767 +needs_sphinx = "1.7" author = "MongoDB, Inc" # -- Options for HTML output ---------------------------------------------- @@ -38,7 +40,8 @@ def _file_man_page_name(fpath: Path) -> Union[str, None]: continue return mat[1] -def _collect_man (app: Sphinx): + +def _collect_man(app: Sphinx): # Note: 'app' is partially-formed, as this is called from the Sphinx.__init__ docdir = Path(app.srcdir) # Find everything: @@ -61,6 +64,7 @@ def _collect_man (app: Sphinx): assert docname, filepath man_pages.append((docname, man_name, "", [author], 3)) + # -- Options for manual page output --------------------------------------- # NOTE: This starts empty, but we populate it in `setup` in _collect_man() (see above) @@ -168,6 +172,7 @@ def generate_html_redirs(app: Sphinx, page: str, templatename: str, context: Dic builder.css_files[:] = prev_css sphinx_log.debug("Wrote redirect: %r -> %r", path, page) + def mongoc_common_setup(app: Sphinx): _collect_man(app) app.connect("html-page-context", generate_html_redirs) diff --git a/src/common/src/mlib/intencode.h b/src/common/src/mlib/intencode.h index ebd04f88909..3f582389334 100644 --- a/src/common/src/mlib/intencode.h +++ b/src/common/src/mlib/intencode.h @@ -21,7 +21,10 @@ #include #include +#include +#include +#include #include #include @@ -165,3 +168,184 @@ mlib_write_f64le (void *out, double d) memcpy (&bits, &d, sizeof d); return mlib_write_u64le (out, bits); } + +/** + * @brief Decode a 64-bit natural number + * + * @param in The input string to be decoded. Does not support a sign or base prefix! + * @param base The base to be decoded. Must not be zero! + * @param out Pointer that receives the decoded value + * @return int A result code for the operation. + * + * See `mlib_i64_parse` for more details. + */ +static inline int +mlib_nat64_parse (mstr_view in, int base, uint64_t *out) +{ + if (in.len == 0) { + // Empty string is not valid + return EINVAL; + } + + + // Accummulate into this value: + uint64_t value = 0; + // Whether any operation in the parse overflowed the integer value + bool did_overflow = false; + // Loop until we have consumed the full string, or encounter an invalid digit + while (in.len) { + // Shift place value for another digit + did_overflow = mlib_mul (&value, base) || did_overflow; + // Case-fold for alpha digits + int32_t digit = mlib_latin_tolower (in.data[0]); + int digit_value = 0; + // Only standard digits + if (digit >= '0' && digit <= '9') { + // Normal digit + digit_value = digit - '0'; + } else if (digit >= 'a' && digit <= 'z') { + // Letter digits + digit_value = (digit - 'a') + 10; + } else { + // Not a valid alnum digit + return EINVAL; + } + if (digit_value >= base) { + // The digit value is out-of-range for our chosen base + return EINVAL; + } + // Accumulate the new digit value + did_overflow = mlib_add (&value, digit_value) || did_overflow; + // Jump to the next digit in the string + in = mstr_substr (in, 1); + } + + if (did_overflow) { + return ERANGE; + } + + (void) (out && (*out = value)); + return 0; +} + +/** + * @brief Parse a string as a 64-bit signed integer + * + * @param in The string of digits to be parsed. + * @param base Optional: The base to use for parsing. Use "0" to infer the base. + * @param out Optional storage for an int64 value to be updated with the result + * @return int Returns an errno value for the parse + * + * - A value of `0` indicates that the parse was successful. + * - A value of `EINVAL` indicates that the input string is not a valid + * representation of an integer. + * - A value of `ERANGE` indicates thath the input string is a valid integer, + * but the actual encoded value cannot be represented in an `int64_t` + * - If the parse fails (returns non-zero), then the value at `*out` will remain + * unmodified. + * + * This differs from `strtoll` in that it requires that the entire string be + * parsed as a valid integer. If parsing stops early, then the result will indicate + * an error of EINVAL. + */ +static inline int +mlib_i64_parse (mstr_view in, int base, int64_t *out) +{ + if (in.len == 0) { + // Empty string is not a valid integer + return EINVAL; + } + // Parse the possible sign prefix + int sign = 1; + // Check for a "+" + if (in.data[0] == '+') { + // Just a plus. Drop it and do nothing with it. + in = mstr_substr (in, 1); + } + // Check for a negative prefix + else if (in.data[0] == '-') { + // Negative sign. We'll negate the value later. + in = mstr_substr (in, 1); + sign = -1; + } + + // Infer the base value, if we have one + if (base == 0) { + if (in.data[0] == '0') { + if (in.len > 1) { + if (mlib_latin_tolower (in.data[1]) == 'x') { + // Hexadecimal + base = 16; + in = mstr_substr (in, 2); + } else if (mlib_latin_tolower (in.data[1]) == 'o') { + // Octal + base = 8; + in = mstr_substr (in, 2); + } else if (mlib_latin_tolower (in.data[1]) == 'b') { + // Binary + base = 2; + in = mstr_substr (in, 2); + } + } + if (base == 0) { + // Other: Octal with a single "0" prefix. Don't trim this, because + // it may be a literal "0" + base = 8; + } + } else { + // No '0' prefix. Treat it as decimal + base = 10; + } + } + + // Try to parse the natural number now that we have removed all prefixes and + // have a non-zero base. + uint64_t nat; + int rc = mlib_nat64_parse (in, base, &nat); + if (rc) { + return rc; + } + + // Try to narrow from the u64 to i64 and apply the sign. This must be done as + // one operation because of the pathological case of parsing INT64_MIN + int64_t i64 = 0; + if (mlib_mul (&i64, nat, sign)) { + return ERANGE; + } + + (void) (out && (*out = i64)); + return 0; +} + +#define mlib_i64_parse(...) MLIB_ARGC_PICK (_mlib_i64_parse, __VA_ARGS__) +#define _mlib_i64_parse_argc_2(S, Ptr) _mlib_i64_parse_argc_3 ((S), 0, (Ptr)) +#define _mlib_i64_parse_argc_3(S, Base, Ptr) mlib_i64_parse (mstr_view_from ((S)), Base, Ptr) + +/** + * @brief Parse a 32-bit integer from a string. + * + * See `mlib_i64_parse` for more details. + */ +static inline int +mlib_i32_parse (mstr_view in, int base, int32_t *out) +{ + int64_t tmp; + int ec = mlib_i64_parse (in, base, &tmp); + if (ec) { + // Failed to parse the int64 value. + return ec; + } + // Attempt to narrow to a 32-bit value + int32_t i32 = 0; + if (mlib_narrow (&i32, tmp)) { + // Value is out-of-range + return ERANGE; + } + // Success + (void) (out && (*out = i32)); + return 0; +} + +#define mlib_i32_parse(...) MLIB_ARGC_PICK (_mlib_i32_parse, __VA_ARGS__) +#define _mlib_i32_parse_argc_2(S, Ptr) _mlib_i32_parse_argc_3 ((S), 0, (Ptr)) +#define _mlib_i32_parse_argc_3(S, Base, Ptr) mlib_i32_parse (mstr_view_from ((S)), Base, Ptr) diff --git a/src/common/src/mlib/str.h b/src/common/src/mlib/str.h new file mode 100644 index 00000000000..2c65ef5b964 --- /dev/null +++ b/src/common/src/mlib/str.h @@ -0,0 +1,554 @@ +/** + * @file mlib/str.h + * @brief String handling utilities + * @date 2025-04-30 + * + * This file provides utilities for handling *sized* strings. That is, strings + * that carry their size, and do not rely on null termination. These APIs also + * do a lot more bounds checking than is found in ``. + * + * @copyright Copyright 2009-present MongoDB, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MLIB_STR_H_INCLUDED +#define MLIB_STR_H_INCLUDED + +#include +#include +#include +#include +#include +#include + +#include +#include + +/** + * @brief A simple non-owning string-view type. + * + * The viewed string can be treated as an array of `char`. It's pointed-to data + * must not be freed or manipulated. + * + * @note The viewed string is NOT guaranteed to be null-terminated. It WILL + * be null-terminated if: Directly created from a string literal, a C string, or + * a null-terminated `mstr_view`. + * @note The viewed string MAY contain nul (zero-value) characters, so using them + * with C string APIs could truncate unexpectedly. + */ +typedef struct mstr_view { + // Pointer to the first character in the string + const char *data; + // Length of the array pointed-to by `data` + size_t len; +} mstr_view; + +/** + * @brief Expand to the two printf format arguments required to format an mstr object + * + * You should use the format specifier `%.*s' for all mstr strings. + * + * This is just a convenience shorthand. + */ +#define MSTR_FMT(S) (int) mstr_view_from (S).len, mstr_view_from (S).data + +/** + * @brief Create an `mstr_view` that views the given array of `char` + * + * @param data Pointer to the beginning of the + * @param len Length of the new string-view + */ +static inline mstr_view +mstr_view_data (const char *data, size_t len) +{ + mstr_view ret; + ret.data = data; + ret.len = len; + return ret; +} + +#if 1 // See "!! NOTE" below + +/** + * @brief Coerce a string-like object to an `mstr_view` of that string + * + * This macro requires that the object have `.data` and `.len` members + */ +#define mstr_view_from(X) mstr_view_data ((X).data, (X).len) + +/** + * ! NOTE: The disabled snippet below is kept for posterity as a drop-in replacment + * ! for mstr_view_from with support for _Generic. + * + * When we can increase the compiler requirements to support _Generic, the following + * macro definition alone makes almost every function in this file significantly + * more concise to use, as it allows us to pass a C string to any API that + * expects an `mstr_view`, enabling code like this: + * + * ``` + * mstr s = get_string(); + * if (mstr_cmp(s, ==, "magicKeyword")) { + * Do something... + * } + * ``` + * + * Without _Generic, we require all C strings to be wrapped with `mstr_cstring`, + * which isn't especially onerous, but it is annoying. Additionally, the below + * `_Generic` macro can be extended to support more complex string-like types. + * + * For reference, support for _Generic requires the following compilers: + * + * - MSVC 19.28.0+ (VS 2019, 16.8.1) + * - GCC 4.9+ + * - Clang 3.0+ + */ + +#else + +/** + * @brief Coerce an object to an `mstr_view` + * + * The object requires a `data` and `len` member + */ +#define mstr_view_from(X) \ + _Generic ((X), mstr_view: _mstr_view_trivial_copy, char *: mstr_cstring, const char *: mstr_cstring) ((X)) +// Just copy an mstr_view by-value +static inline mstr_view +_mstr_view_trivial_copy (mstr_view s) +{ + return s; +} + +#endif + + +/** + * @brief Create an `mstr_view` referring to the given null-terminated C string + * + * @param s Pointer to a C string. The length of the returned string is infered using `strlen` + */ +static inline mstr_view +mstr_cstring (const char *s) +{ + const size_t l = strlen (s); + return mstr_view_data (s, l); +} + +/** + * @brief Compare two strings lexicographically by each code unit + * + * If called with two arguments behaves the same as `strcmp`. If called with + * three arguments, the center argument should be an infix operator to perform + * the semantic comparison. + */ +static inline enum mlib_cmp_result +mstr_cmp (mstr_view a, mstr_view b) +{ + size_t l = a.len; + if (b.len < l) { + l = b.len; + } + // Use `memcmp`, not `strncmp`: We want to respect nul characters + int r = memcmp (a.data, b.data, l); + if (r) { + // Not equal: Compare with zero to normalize to the cmp_result value + return mlib_cmp (r, 0); + } + // Same prefixes, the ordering is now based on their length (longer string > shorter string) + return mlib_cmp (a.len, b.len); +} + +#define mstr_cmp(...) MLIB_ARGC_PICK (_mstr_cmp, __VA_ARGS__) +#define _mstr_cmp_argc_2(A, B) mstr_cmp (mstr_view_from (A), mstr_view_from (B)) +#define _mstr_cmp_argc_3(A, Op, B) (_mstr_cmp_argc_2 (A, B) Op 0) + +/** + * @brief If the given codepoint is a Basic Latin (ASCII) uppercase character, + * return the lowercase character. Other codepoint values are returned unchanged. + * + * This is safer than `tolower`, because it doesn't respect locale and has no + * undefined behavior. + */ +static inline int32_t +mlib_latin_tolower (int32_t a) +{ + if (a >= 0x41 /* "A" */ && a <= 0x5a /* "Z" */) { + a += 0x20; // Adjust from "A" -> "a" + } + return a; +} + +/** + * @brief Compare two individual codepoint values, with case-insensitivity in + * the Basic Latin range. + */ +static inline enum mlib_cmp_result +mlib_latin_charcasecmp (int32_t a, int32_t b) +{ + return mlib_cmp (mlib_latin_tolower (a), mlib_latin_tolower (b)); +} + +/** + * @brief Compare two strings lexicographically, case-insensitive in the Basic + * Latin range. + * + * If called with two arguments, behaves the same as `strcasecmp`. If called with + * three arguments, the center argument should be an infix operator to perform + * the semantic comparison. + */ +static inline enum mlib_cmp_result +mstr_latin_casecmp (mstr_view a, mstr_view b) +{ + size_t l = a.len; + if (b.len < l) { + l = b.len; + } + mlib_foreach_urange (i, l) { + // We don't need to do any UTF-8 decoding, because our case insensitivity + // only activates for 1-byte encoded codepoints, and all other valid UTF-8 + // sequences will collate equivalently with byte-wise comparison to a UTF-32 + // encoding. + enum mlib_cmp_result r = mlib_latin_charcasecmp (a.data[i], b.data[i]); + if (r) { + // Not equivalent at this code unit. Return this as the overall string ordering. + return r; + } + } + // Same prefixes, the ordering is now based on their length (longer string > shorter string) + return mlib_cmp (a.len, b.len); +} + +#define mstr_latin_casecmp(...) MLIB_ARGC_PICK (_mstr_latin_casecmp, __VA_ARGS__) +#define _mstr_latin_casecmp_argc_2(A, B) mstr_latin_casecmp (mstr_view_from (A), mstr_view_from (B)) +#define _mstr_latin_casecmp_argc_3(A, Op, B) (_mstr_latin_casecmp_argc_2 (A, B) Op 0) + +/** + * @brief Adjust a possibly negative index position to wrap around for a string + * + * @param s The string to be respected for index wrapping + * @param pos The maybe-negative index to be adjusted + * @param clamp_to_length If `true` and given a non-negative value, if that + * value is greater than the string length, this function will return the string + * length instead. + * @return size_t The new zero-based non-negative index + * + * If `pos` is negative, then it represents indexing from the end of the string, + * where `-1` refers to the last character in the string, `-2` the penultimate, + * etc. If the absolute value is greater than the length of the string, the + * program will be terminated. + */ +static inline size_t +_mstr_adjust_index (mstr_view s, mlib_upsized_integer pos, bool clamp_to_length) +{ + if (clamp_to_length) { + if (pos.is_signed) { + if (pos.i.s > 0 && (size_t) pos.i.s > s.len) { + pos.i.s = (intmax_t) s.len; + } + } else { + if (pos.i.u > s.len) { + pos.i.u = s.len; + } + } + } + if (pos.is_signed) { + if (pos.i.s < 0) { + // This will add the negative value to the length of the string. If such + // an operation would result a negative value, this will terminate the + // program. + return mlib_assert_add (size_t, s.len, pos.i.s); + } + } + mlib_check (pos.i.u <= s.len, because, "the string position index must not be larger than the string length"); + return pos.i.u; +} + +/** + * @brief Create a new `mstr_view` that views a substring within another string + * + * @param s The original string view to be inspected + * @param pos The number of `char` to skip in `s`, or a negative value to + * pos from the end of the string. + * @param len The length of the new string view (optional, default SIZE_MAX) + * + * The length of the string view is clamped to the characters available in `s`, + * so passing a too-large value for `len` is well-defined. Passing a too-large + * value for `pos` will abort the program. + * + * Callable as: + * + * - `mstr_substr(s, pos)` + * - `mstr_substr(s, pos, len)` + */ +static inline mstr_view +mstr_substr (mstr_view s, mlib_upsized_integer pos_, size_t len) +{ + const size_t pos = _mstr_adjust_index (s, pos_, false); + // Number of characters in the string after we remove the prefix + const size_t remain = s.len - pos; + // Clamp the new length to the size that is actually available. + if (len > remain) { + len = remain; + } + return mstr_view_data (s.data + pos, len); +} + +#define mstr_substr(...) MLIB_ARGC_PICK (_mstr_substr, __VA_ARGS__) +#define _mstr_substr_argc_2(Str, Start) _mstr_substr_argc_3 (Str, Start, SIZE_MAX) +#define _mstr_substr_argc_3(Str, Start, Stop) mstr_substr (mstr_view_from (Str), mlib_upsize_integer (Start), Stop) + +/** + * @brief Obtain a slice of the given string view, where the two arguments are zero-based indices into the string + * + * @param s The string to be sliced + * @param start The zero-based index of the new string start + * @param end The zero-based index of the first character to exclude from the new string + * + * @note Unlike `substr`, the second argument is required, and must specify the index at which the + * string will end, rather than the length of the string. + */ +static inline mstr_view +mstr_slice (const mstr_view s, const mlib_upsized_integer start_, const mlib_upsized_integer end_) +{ + const size_t start_pos = _mstr_adjust_index (s, start_, false); + const size_t end_pos = _mstr_adjust_index (s, end_, true); + mlib_check (end_pos >= start_pos, because, "Slice positions must end after the start position"); + const size_t sz = (size_t) (end_pos - start_pos); + return mstr_substr (s, start_pos, sz); +} +#define mstr_slice(S, StartPos, EndPos) \ + mstr_slice (mstr_view_from (S), mlib_upsize_integer ((StartPos)), mlib_upsize_integer ((EndPos))) + +/** + * @brief Find the first occurrence of `needle` within `hay`, returning the zero-based index + * if found, and `SIZE_MAX` if it is not found. + * + * @param hay The string which is being scanned + * @param needle The substring that we are searching to find + * @param pos The start position of the search (optional, default zero) + * @param len The number of characters to search in `hay` (optional, default SIZE_MAX) + * @return size_t If found, the zero-based index of the first occurrence within + * the string. If not found, returns `SIZE_MAX`. + * + * The `len` is clamped to the available string length. + * + * Callable as: + * + * - `mstr_find(hay, needle)` + * - `mstr_find(hay, needle, pos)` + * - `mstr_find(hay, needle, pos, len)` + */ +static inline size_t +mstr_find (mstr_view hay, mstr_view const needle, mlib_upsized_integer const pos_, size_t const len) +{ + const size_t pos = _mstr_adjust_index (hay, pos_, false); + // Trim the hay according to our search window: + hay = mstr_substr (hay, pos, len); + + // Larger needle can never exist within the smaller string: + if (hay.len < needle.len) { + return SIZE_MAX; + } + + // Set the index at which we can stop searching early. This will never + // overflow, because we guard against hay.len > needle.len + size_t stop_idx = hay.len - needle.len; + // Use "<=", because we do want to include the final search position + for (size_t offset = 0; offset <= stop_idx; ++offset) { + if (memcmp (hay.data + offset, needle.data, needle.len) == 0) { + // Return the found position. Adjust by the start pos since we may + // have trimmed the search window + return offset + pos; + } + } + + // Nothing was found. Return SIZE_MAX to indicate the not-found + return SIZE_MAX; +} + +#define mstr_find(...) MLIB_ARGC_PICK (_mstr_find, __VA_ARGS__) +#define _mstr_find_argc_2(Hay, Needle) _mstr_find_argc_3 (Hay, Needle, 0) +#define _mstr_find_argc_3(Hay, Needle, Start) _mstr_find_argc_4 (Hay, Needle, Start, SIZE_MAX) +#define _mstr_find_argc_4(Hay, Needle, Start, Stop) \ + mstr_find (mstr_view_from (Hay), mstr_view_from (Needle), mlib_upsize_integer (Start), Stop) + +/** + * @brief Find the zero-based index of the first `char` in `hay` that also occurrs in `needles` + * + * This is different from `find()` because it considers each char in `needles` as an individual + * one-character string to be search for in `hay`. + * + * @param hay The string to be search + * @param needles A string containing a set of characters which are searched for in `hay` + * @param pos The index at which to begin searching (optional, default is zero) + * @param len The number of characters in `hay` to consider before stopping (optional, default is SIZE_MAX) + * @return size_t If a needle is found, returns the zero-based index of that first needle. + * Otherwise, returns SIZE_MAX + * + * Callable as: + * + * - `mstr_find_first_of(hay, needles)` + * - `mstr_find_first_of(hay, needles, pos)` + * - `mstr_find_first_of(hay, needles, pos, len)` + */ +static inline size_t +mstr_find_first_of (mstr_view hay, mstr_view const needles, mlib_upsized_integer const pos_, size_t const len) +{ + const size_t pos = _mstr_adjust_index (hay, pos_, false); + // Trim to fit the search window + hay = mstr_substr (hay, pos, len); + // We search by incrementing an index + mlib_foreach_urange (idx, hay.len) { + // Grab a substring of the single char at the current search index + mstr_view one = mstr_substr (hay, idx, 1); + // Test if the single char occurs anywhere in the needle set + if (mstr_find (needles, one) != SIZE_MAX) { + // We found the first index in `hay` where one of the needles occurs. Adjust + // by `pos` since we may have trimmed + return idx + pos; + } + } + return SIZE_MAX; +} + +#define mstr_find_first_of(...) MLIB_ARGC_PICK (_mstr_find_first_of, __VA_ARGS__) +#define _mstr_find_first_of_argc_2(Hay, Needle) _mstr_find_first_of_argc_3 (Hay, Needle, 0) +#define _mstr_find_first_of_argc_3(Hay, Needle, Pos) _mstr_find_first_of_argc_4 (Hay, Needle, Pos, SIZE_MAX) +#define _mstr_find_first_of_argc_4(Hay, Needle, Pos, Len) \ + mstr_find_first_of (Hay, Needle, mlib_upsize_integer (Pos), Len) + +/** + * @brief Split a single string view into two strings at the given position + * + * @param s The string to be split + * @param pos The position at which the prefix string is ended + * @param drop [optional] The number of characters to drop between the prefix and suffix + * @param prefix [out] Updated to point to the part of the string before the split + * @param suffix [out] Updated to point to the part of the string after the split + * + * `pos` and `drop` are clamped to the size of the input string. + * + * Callable as: + * + * - `mstr_split_at(s, pos, prefix, suffix)` + * - `mstr_split_at(s, pos, drop, prefix, suffix)` + * + * If either `prefix` or `suffix` is a null pointer, then they will be ignored + */ +static inline void +mstr_split_at (mstr_view s, mlib_upsized_integer pos_, size_t drop, mstr_view *prefix, mstr_view *suffix) +{ + const size_t pos = _mstr_adjust_index (s, pos_, true /* clamp to the string size */); + // Save the prefix string + if (prefix) { + *prefix = mstr_substr (s, 0, pos); + } + // Save the suffix string + if (suffix) { + // The number of characters that remain after the prefix is removed + const size_t remain = s.len - pos; + // Clamp the number of chars to drop to not overrun the input string + if (remain < drop) { + drop = remain; + } + // The start position of the new string + const size_t next_start = pos + drop; + *suffix = mstr_substr (s, next_start, SIZE_MAX); + } +} + +#define mstr_split_at(...) MLIB_ARGC_PICK (_mstr_split_at, __VA_ARGS__) +#define _mstr_split_at_argc_4(Str, Pos, Prefix, Suffix) _mstr_split_at_argc_5 (Str, Pos, 0, Prefix, Suffix) +#define _mstr_split_at_argc_5(Str, Pos, Drop, Prefix, Suffix) \ + mstr_split_at (mstr_view_from (Str), mlib_upsize_integer (Pos), Drop, Prefix, Suffix) + +/** + * @brief Split a string in two around the first occurrence of some infix string. + * + * @param s The string to be split in twain + * @param infix The infix string to be searched for + * @param prefix The part of the string that precedes the infix (nullable) + * @param suffix The part of the string that follows the infix (nullable) + * @return true If the infix was found + * @return false Otherwise + * + * @note If `infix` does not occur in `s`, then `*prefix` will be set equal to `s`, + * and `*suffix` will be made an empty string, as if the infix occurred at the end + * of the string. + */ +static inline bool +mstr_split_around (mstr_view s, mstr_view infix, mstr_view *prefix, mstr_view *suffix) +{ + // Find the position of the infix. If it is not found, returns SIZE_MAX + const size_t pos = mstr_find (s, infix); + // Split at the infix, dropping as many characters as are in the infix. If + // the `pos` is SIZE_MAX, then this call will clamp to the end of the string. + mstr_split_at (s, pos, infix.len, prefix, suffix); + // Return `true` if we found the infix, indicated by a not-SIZE_MAX `pos` + return pos != SIZE_MAX; +} + +#define mstr_split_around(Str, Infix, PrefixPtr, SuffixPtr) \ + mstr_split_around (mstr_view_from ((Str)), mstr_view_from ((Infix)), (PrefixPtr), (SuffixPtr)) + +/** + * @brief Test whether the given string starts with the given prefix + * + * @param str The string to be tested + * @param prefix The prefix to be searched for + * @return true if-and-only-if `str` starts with `prefix` + * @return false Otherwise + */ +static inline bool +mstr_starts_with (mstr_view str, mstr_view prefix) +{ + // Trim to match the length of the prefix we want + str = mstr_substr (str, 0, prefix.len); + // Check if the trimmed string is the same as the prefix + return mstr_cmp (str, ==, prefix); +} +#define mstr_starts_with(Str, Prefix) mstr_starts_with (mstr_view_from (Str), mstr_view_from (Prefix)) + +/** + * @brief Test whether a substring occurs at any point within the given string + * + * @param str The string to be inspected + * @param needle The substring to be searched for + * @return true If-and-only-if `str` contains `needle` at any position + * @return false Otherise + */ +static inline bool +mstr_contains (mstr_view str, mstr_view needle) +{ + return mstr_find (str, needle) != SIZE_MAX; +} +#define mstr_contains(Str, Needle) mstr_contains (mstr_view_from (Str), mstr_view_from (Needle)) + +/** + * @brief Test whether a given string contains any of the characters in some other string + * + * @param str The string to be inspected + * @param needle A string to be treated as a set of one-byte characters to search for + * @return true If-and-only-if `str` contains `needle` at any position + * @return false Otherise + * + * @note This function does not currently support multi-byte codepoints + */ +static inline bool +mstr_contains_any_of (mstr_view str, mstr_view needle) +{ + return mstr_find_first_of (str, needle) != SIZE_MAX; +} +#define mstr_contains_any_of(Str, Needle) mstr_contains_any_of (mstr_view_from (Str), mstr_view_from (Needle)) + +#endif // MLIB_STR_H_INCLUDED diff --git a/src/common/src/mlib/test.h b/src/common/src/mlib/test.h index 5fbf1ca8215..346ba16767e 100644 --- a/src/common/src/mlib/test.h +++ b/src/common/src/mlib/test.h @@ -145,6 +145,9 @@ typedef struct mlib_source_location { #define _mlibCheckCondition_neq(A, B) \ _mlibCheckIntCmp ( \ mlib_equal, false, "!=", mlib_upsize_integer (A), mlib_upsize_integer (B), #A, #B, mlib_this_source_location ()) +// Simple assertion with an explanatory string +#define _mlibCheckCondition_because(Cond, Msg) \ + _mlibCheckConditionBecause (Cond, #Cond, Msg, mlib_this_source_location ()) /// Check evaluator when given a single boolean static inline void @@ -157,6 +160,17 @@ _mlibCheckConditionSimple (bool c, const char *expr, struct mlib_source_location } } +static inline void +_mlibCheckConditionBecause (bool cond, const char *expr, const char *reason, mlib_source_location here) +{ + if (!cond) { + fprintf ( + stderr, "%s:%d: in [%s]: Check condition ⟨%s⟩ failed (%s)\n", here.file, here.lineno, here.func, expr, reason); + fflush (stderr); + abort (); + } +} + // Implement integer comparison checks static inline void _mlibCheckIntCmp (enum mlib_cmp_result cres, // The cmp result to check diff --git a/src/common/tests/test-mlib.c b/src/common/tests/test-mlib.c index 163c3ca28ba..d049e222918 100644 --- a/src/common/tests/test-mlib.c +++ b/src/common/tests/test-mlib.c @@ -1,5 +1,6 @@ #include "TestSuite.h" +#include #include #include #include @@ -44,6 +45,11 @@ _test_checks (void) mlib_assert_aborts () { mlib_check (1, neq, 1); } + // "because" string + mlib_check (true, because, "just true"); + mlib_assert_aborts () { + mlib_check (false, because, "this will fail"); + } } static void @@ -448,6 +454,55 @@ _test_int_encoding (void) } } +static void +_test_int_parse (void) +{ + const int64_t bogus_value = 2424242424242424242; + struct case_ { + const char *in; + int64_t value; + int ec; + } cases[] = { + // Basics: + {"0", 0}, + {"1", 1}, + {"+1", 1}, + {"-1", -1}, + // Differences from strtoll + // We require at least one digit immediately + {"a1", bogus_value, EINVAL}, + {"", bogus_value, EINVAL}, + // No space skipping + {" 1", bogus_value, EINVAL}, + {" +42", bogus_value, EINVAL}, + // No trailing characters + {"123a", bogus_value, EINVAL}, + // strtoll: Set ERANGE if the value is too large + {"123456789123456789123", bogus_value, ERANGE}, + // Difference: We generate EINVAL if its not an integer, even if strtoll says ERANGE + {"123456789123456789123abc", bogus_value, EINVAL}, + // Truncated prefix + {"+", bogus_value, EINVAL}, + {"+0x", bogus_value, EINVAL}, + {"0x", bogus_value, EINVAL}, + {"-0b", bogus_value, EINVAL}, + {"0xff", 0xff}, + {"0xfr", bogus_value, EINVAL}, + {"0x0", 0}, + {"0o755", 0755}, + {"0755", 0755}, + // Boundary cases: + {"9223372036854775807", INT64_MAX}, + {"-9223372036854775808", INT64_MIN}, + }; + mlib_foreach_arr (struct case_, test, cases) { + int64_t value = bogus_value; + int ec = mlib_i64_parse (mstr_cstring (test->in), &value); + mlib_check (value, eq, test->value); + mlib_check (ec, eq, test->ec); + } +} + static void _test_foreach (void) { @@ -658,6 +713,148 @@ _test_ckdint_partial (void) } } +static void +_test_str_view (void) +{ + mstr_view sv = mstr_cstring ("Hello, world!"); + mlib_check (sv.data, str_eq, "Hello, world!"); + + mlib_check (mstr_cmp (sv, ==, mstr_cstring ("Hello, world!"))); + mlib_check (mstr_cmp (sv, >, mstr_cstring ("Hello"))); + // Longer strings are greater than shorter strings + mlib_check (mstr_cmp (sv, <, mstr_cstring ("ZZZZZ"))); + // str_view_from duplicates a string view: + mlib_check (mstr_cmp (sv, ==, mstr_view_from (sv))); + + // Substring + { + sv = mstr_cstring ("foobar"); + // Implicit length includes everything: + mlib_check (mstr_cmp (mstr_substr (sv, 2), ==, mstr_cstring ("obar"))); + // Explicit length trims: + mlib_check (mstr_cmp (mstr_substr (sv, 2, 1), ==, mstr_cstring ("o"))); + // Substring over the whole length: + mlib_check (mstr_cmp (mstr_substr (sv, sv.len), ==, mstr_cstring (""))); + } + + // Substring from end + { + sv = mstr_cstring ("foobar"); + mlib_check (mstr_cmp (mstr_substr (sv, -3), ==, mstr_cstring ("bar"))); + mlib_check (mstr_cmp (mstr_substr (sv, -6), ==, mstr_cstring ("foobar"))); + } + + // Searching forward: + { + sv = mstr_cstring ("foobar"); + mlib_check (mstr_find (sv, mstr_cstring ("foo")), eq, 0); + mlib_check (mstr_find (sv, mstr_cstring ("o")), eq, 1); + mlib_check (mstr_find (sv, mstr_cstring ("foof")), eq, SIZE_MAX); + mlib_check (mstr_find (sv, mstr_cstring ("bar")), eq, 3); + mlib_check (mstr_find (sv, mstr_cstring ("barf")), eq, SIZE_MAX); + // Start at index 3 + mlib_check (mstr_find (sv, mstr_cstring ("bar"), 3), eq, 3); + // Starting beyond the ocurrence will fail: + mlib_check (mstr_find (sv, mstr_cstring ("b"), 4), eq, SIZE_MAX); + // Empty string is found immediately: + mlib_check (mstr_find (sv, mstr_cstring ("")), eq, 0); + } + + { + // Searching for certain chars + mstr_view digits = mstr_cstring ("1234567890"); + // The needle chars never occur, so returns SIZE_MAX + mlib_check (mstr_find_first_of (mstr_cstring ("foobar"), digits), eq, SIZE_MAX); + // `1` at the fourth pos + mlib_check (mstr_find_first_of (mstr_cstring ("foo1barbaz4"), digits), eq, 3); + // `1` at the fourth pos, with a trimmed window: + mlib_check (mstr_find_first_of (mstr_cstring ("foo1barbaz4"), digits, 3), eq, 3); + // `4` is found, since we drop the `1` from the window: + mlib_check (mstr_find_first_of (mstr_cstring ("foo1barbaz4"), digits, 4), eq, 10); + // Empty needles string is never found in any string + mlib_check (mstr_find_first_of (mstr_cstring ("foo bar baz"), mstr_cstring ("")), eq, SIZE_MAX); + // Find at the end of the string + mlib_check (mstr_find_first_of (mstr_cstring ("foo bar baz"), mstr_cstring ("z")), eq, 10); + } + + // Splitting + { + sv = mstr_cstring ("foo bar baz"); + mstr_view a, b; + // Trim at index 3, drop one char: + mstr_split_at (sv, 3, 1, &a, &b); + mlib_check (mstr_cmp (a, ==, mstr_cstring ("foo"))); + mlib_check (mstr_cmp (b, ==, mstr_cstring ("bar baz"))); + // Trim at index 3, default drop=0: + mstr_split_at (sv, 3, &a, &b); + mlib_check (mstr_cmp (a, ==, mstr_cstring ("foo"))); + mlib_check (mstr_cmp (b, ==, mstr_cstring (" bar baz"))); + // Trim past-the-end + mstr_split_at (sv, 5000, &a, &b); + mlib_check (mstr_cmp (a, ==, mstr_cstring ("foo bar baz"))); + mlib_check (mstr_cmp (b, ==, mstr_cstring (""))); + // Drop too many: + mstr_split_at (sv, 0, 5000, &a, &b); + mlib_check (mstr_cmp (a, ==, mstr_cstring (""))); + mlib_check (mstr_cmp (b, ==, mstr_cstring (""))); + // Past-the-end and also drop + mstr_split_at (sv, 4000, 42, &a, &b); + mlib_check (mstr_cmp (a, ==, mstr_cstring ("foo bar baz"))); + mlib_check (mstr_cmp (b, ==, mstr_cstring (""))); + + // Split using a negative index + mstr_split_at (sv, -4, 1, &a, &b); + mlib_check (mstr_cmp (a, ==, mstr_cstring ("foo bar"))); + mlib_check (mstr_cmp (b, ==, mstr_cstring ("baz"))); + } + + // Splitting around an infix + { + sv = mstr_cstring ("foo bar baz"); + mstr_view a, b; + // Split around the first space + const mstr_view space = mstr_cstring (" "); + mlib_check (mstr_split_around (sv, space, &a, &b)); + mlib_check (mstr_cmp (a, ==, mstr_cstring ("foo"))); + mlib_check (mstr_cmp (b, ==, mstr_cstring ("bar baz"))); + // Split again + mlib_check (mstr_split_around (b, space, &a, &b)); + mlib_check (mstr_cmp (a, ==, mstr_cstring ("bar"))); + mlib_check (mstr_cmp (b, ==, mstr_cstring ("baz"))); + // Split again. This won't find a space, but will still do something + mlib_check (!mstr_split_around (b, space, &a, &b)); + mlib_check (mstr_cmp (a, ==, mstr_cstring ("baz"))); + mlib_check (mstr_cmp (b, ==, mstr_cstring (""))); + // Splitting on the final empty string does nothign + mlib_check (!mstr_split_around (b, space, &a, &b)); + mlib_check (mstr_cmp (a, ==, mstr_cstring (""))); + mlib_check (mstr_cmp (b, ==, mstr_cstring (""))); + } + + // Case folding + { + mlib_check (mlib_latin_tolower ('a'), eq, 'a'); + mlib_check (mlib_latin_tolower ('z'), eq, 'z'); + mlib_check (mlib_latin_tolower ('A'), eq, 'a'); + mlib_check (mlib_latin_tolower ('Z'), eq, 'z'); + // Other chars are unchanged: + mlib_check (mlib_latin_tolower ('7'), eq, '7'); + mlib_check (mlib_latin_tolower ('?'), eq, '?'); + } + + // Case-insensitive compare + { + mlib_check (mstr_latin_casecmp (mstr_cstring ("foo"), ==, mstr_cstring ("foo"))); + mlib_check (mstr_latin_casecmp (mstr_cstring ("foo"), !=, mstr_cstring ("bar"))); + mlib_check (mstr_latin_casecmp (mstr_cstring ("Foo"), ==, mstr_cstring ("foo"))); + mlib_check (mstr_latin_casecmp (mstr_cstring ("Foo"), >, mstr_cstring ("bar"))); + // "Food" < "foo" when case-sensitive ('F' < 'f'): + mlib_check (mstr_cmp (mstr_cstring ("Food"), <, mstr_cstring ("foo"))); + // But "Food" > "foo" when case-insensitive: + mlib_check (mstr_latin_casecmp (mstr_cstring ("Food"), >, mstr_cstring ("foo"))); + } +} + void test_mlib_install (TestSuite *suite) { @@ -668,9 +865,11 @@ test_mlib_install (TestSuite *suite) TestSuite_Add (suite, "/mlib/in-range", _test_in_range); TestSuite_Add (suite, "/mlib/assert-aborts", _test_assert_aborts); TestSuite_Add (suite, "/mlib/int-encoding", _test_int_encoding); + TestSuite_Add (suite, "/mlib/int-parse", _test_int_parse); TestSuite_Add (suite, "/mlib/foreach", _test_foreach); TestSuite_Add (suite, "/mlib/check-cast", _test_cast); TestSuite_Add (suite, "/mlib/ckdint-partial", _test_ckdint_partial); + TestSuite_Add (suite, "/mlib/str_view", _test_str_view); } mlib_diagnostic_pop (); diff --git a/src/libbson/doc/bson_error_clear.rst b/src/libbson/doc/bson_error_clear.rst new file mode 100644 index 00000000000..7fbb2a1271f --- /dev/null +++ b/src/libbson/doc/bson_error_clear.rst @@ -0,0 +1,24 @@ +:man_page: bson_error_clear + +bson_error_clear() +================== + +Synopsis +-------- + +.. code-block:: c + + void + bson_error_clear (bson_error_t *error); + +Parameters +---------- + +* ``error``: A pointer to storage for a :symbol:`bson_error_t`, or NULL. + +Description +----------- + +If given a non-null pointer to a :symbol:`bson_error_t`, this function will +clear any error value that is stored in the pointed-to object. If given a null +pointer, this function has no effect. diff --git a/src/libbson/doc/bson_error_reset.rst b/src/libbson/doc/bson_error_reset.rst new file mode 100644 index 00000000000..cba44202dd6 --- /dev/null +++ b/src/libbson/doc/bson_error_reset.rst @@ -0,0 +1,29 @@ +:man_page: bson_error_reset + +bson_error_reset() +================== + +Synopsis +-------- + +.. code-block:: c + + #define bson_error_reset(ErrorPointer) + +Parameters +---------- + +* ``ErrorPointer``: An l-value expression of type ``bson_error_t*``. May be a + null pointer. + +Description +----------- + +This function-like macro modifies a pointer to :symbol:`bson_error_t` to be +non-null, and clears any contained value using :symbol:`bson_error_clear`. + +If the given pointer object is null, then the pointer is updated to point to a +local anonymous :symbol:`bson_error_t` object. After the evaluation of this +macro, it is gauranteed that the given pointer is non-null. + +.. important:: This function-like macro is not valid in C++! diff --git a/src/libbson/doc/bson_error_t.rst b/src/libbson/doc/bson_error_t.rst index 561d2e94d19..221e0a744ba 100644 --- a/src/libbson/doc/bson_error_t.rst +++ b/src/libbson/doc/bson_error_t.rst @@ -36,6 +36,8 @@ See `Handling Errors `_. bson_set_error bson_strerror_r + bson_error_clear + bson_error_reset Example ------- diff --git a/src/libbson/src/bson/bson-error.c b/src/libbson/src/bson/bson-error.c index acefa7b5958..82ae8993df8 100644 --- a/src/libbson/src/bson/bson-error.c +++ b/src/libbson/src/bson/bson-error.c @@ -77,7 +77,9 @@ bson_set_error (bson_error_t *error, /* OUT */ bson_set_error_category (error, BSON_ERROR_CATEGORY); va_start (args, format); - bson_vsnprintf (error->message, sizeof error->message, format, args); + char buffer[sizeof error->message]; + bson_vsnprintf (buffer, sizeof error->message, format, args); + memcpy (error->message, buffer, sizeof buffer); va_end (args); } } diff --git a/src/libbson/src/bson/bson-types.h b/src/libbson/src/bson/bson-types.h index 57dc64c5fbd..f0042a76ebc 100644 --- a/src/libbson/src/bson/bson-types.h +++ b/src/libbson/src/bson/bson-types.h @@ -482,6 +482,46 @@ typedef struct _bson_error_t { BSON_STATIC_ASSERT2 (error_t, sizeof (bson_error_t) == 512); +/** + * @brief Reset the content of a bson_error_t to indicate no error. + * + * @param error Pointer to an error to be overwritten. If null, this function + * has no effect. + * + * This is static-inline because it is trivially optimizable as a (conditional) + * `memset`. + */ +static inline void +bson_error_clear (bson_error_t *error) +{ + if (!error) { + return; + } + // Statically initialized to a zero struct: + static bson_error_t zero_error; + // Replace the caller's value: + *error = zero_error; +} + +/** + * @brief Given a `bson_error_t` pointer l-value, ensure that it is non-null, and clear any + * error value that it might hold. + * + * @param ErrorPointer An l-value expression of type `bson_error_t*`. + * + * If the passed pointer is null, then it will be updated to point to an anonymous + * `bson_error_t` object that lives in the caller's scope. + * + * @note This macro is not valid in C++ because it relies on C99 compound literal semantics + */ +#define bson_error_reset(ErrorPointer) bson_error_reset (&(ErrorPointer), &(bson_error_t) {0}) +static inline void (bson_error_reset) (bson_error_t **error, bson_error_t *localptr) +{ + if (*error == NULL) { + *error = localptr; + } + bson_error_clear (*error); +} /** * bson_next_power_of_two: diff --git a/src/libbson/tests/test-bson-error.c b/src/libbson/tests/test-bson-error.c index f30a20970dd..e4007f2af64 100644 --- a/src/libbson/tests/test-bson-error.c +++ b/src/libbson/tests/test-bson-error.c @@ -15,6 +15,7 @@ */ #include +#include #include "TestSuite.h" @@ -31,6 +32,36 @@ test_bson_error_basic (void) ASSERT_CMPUINT (error.reserved, ==, 1u); // BSON_ERROR_CATEGORY } +static void +test_bson_error_clear (void) +{ + bson_error_t err; + err.code = 42; + err.domain = 1729; + bson_error_clear (&err); + mlib_check (err.code, eq, 0); + mlib_check (err.domain, eq, 0); + + // Valid no-op: + bson_error_clear (NULL); +} + +static void +test_bson_error_reset (void) +{ + bson_error_t err; + bson_error_t *eptr = &err; + err.code = 42; + bson_error_reset (eptr); + mlib_check (eptr, ptr_eq, &err); + mlib_check (err.code, eq, 0); + + eptr = NULL; + bson_error_reset (eptr); + mlib_check (eptr != NULL, because, "bson_error_reset sets null pointers to non-null"); + mlib_check (eptr->code, eq, 0); +} + static void test_bson_strerror_r (void) { @@ -50,5 +81,7 @@ void test_bson_error_install (TestSuite *suite) { TestSuite_Add (suite, "/bson/error/basic", test_bson_error_basic); + TestSuite_Add (suite, "/bson/error/clear", test_bson_error_clear); + TestSuite_Add (suite, "/bson/error/reset", test_bson_error_reset); TestSuite_Add (suite, "/bson/strerror_r", test_bson_strerror_r); } diff --git a/src/libmongoc/doc/mongoc_uri_set_compressors.rst b/src/libmongoc/doc/mongoc_uri_set_compressors.rst index fbe4ede7f60..7fbd5b85a4b 100644 --- a/src/libmongoc/doc/mongoc_uri_set_compressors.rst +++ b/src/libmongoc/doc/mongoc_uri_set_compressors.rst @@ -15,7 +15,9 @@ Parameters ---------- * ``uri``: A :symbol:`mongoc_uri_t`. -* ``compressors``: A string consisting of one or more comma (,) separated compressors (e.g. "snappy,zlib") or ``NULL``. Passing ``NULL`` clears any existing compressors set on ``uri``. +* ``compressors``: A string consisting of one or more comma (,) separated + compressors names (e.g. "snappy,zlib") or ``NULL``. Passing ``NULL`` or an + empty string clears any existing compressors set on ``uri``. Description ----------- diff --git a/src/libmongoc/src/mongoc/mongoc-client.c b/src/libmongoc/src/mongoc/mongoc-client.c index a663744dbb7..47dfeca45f6 100644 --- a/src/libmongoc/src/mongoc/mongoc-client.c +++ b/src/libmongoc/src/mongoc/mongoc-client.c @@ -57,6 +57,7 @@ #include #include #include +#include #ifdef MONGOC_ENABLE_SSL #include @@ -116,7 +117,8 @@ srv_callback (const char *hostname, PDNS_RECORD pdns, mongoc_rr_data_t *rr_data, _mongoc_host_list_remove_host (&(rr_data->hosts), pdns->Data.SRV.pNameTarget, pdns->Data.SRV.wPort); } - if (!_mongoc_host_list_from_hostport_with_err (&new_host, pdns->Data.SRV.pNameTarget, pdns->Data.SRV.wPort, error)) { + if (!_mongoc_host_list_from_hostport_with_err ( + &new_host, mstr_cstring (pdns->Data.SRV.pNameTarget), pdns->Data.SRV.wPort, error)) { return false; } _mongoc_host_list_upsert (&rr_data->hosts, &new_host); @@ -311,7 +313,7 @@ srv_callback (const char *hostname, ns_msg *ns_answer, ns_rr *rr, mongoc_rr_data DNS_ERROR ("Invalid record in SRV answer for \"%s\": \"%s\"", hostname, _mongoc_hstrerror (h_errno)); } - if (!_mongoc_host_list_from_hostport_with_err (&new_host, name, port, error)) { + if (!_mongoc_host_list_from_hostport_with_err (&new_host, mstr_cstring (name), port, error)) { GOTO (done); } _mongoc_host_list_upsert (&rr_data->hosts, &new_host); diff --git a/src/libmongoc/src/mongoc/mongoc-cluster.c b/src/libmongoc/src/mongoc/mongoc-cluster.c index 1c38a423aaf..14c447e7a7c 100644 --- a/src/libmongoc/src/mongoc/mongoc-cluster.c +++ b/src/libmongoc/src/mongoc/mongoc-cluster.c @@ -493,7 +493,6 @@ mongoc_cluster_run_command_monitored (mongoc_cluster_t *cluster, mongoc_cmd_t *c int64_t started = bson_get_monotonic_time (); const mongoc_server_stream_t *server_stream; bson_t reply_local; - bson_error_t error_local; bson_iter_t iter; bson_t encrypted = BSON_INITIALIZER; bson_t decrypted = BSON_INITIALIZER; @@ -508,9 +507,7 @@ mongoc_cluster_run_command_monitored (mongoc_cluster_t *cluster, mongoc_cmd_t *c if (!reply) { reply = &reply_local; } - if (!error) { - error = &error_local; - } + bson_error_reset (error); if (_mongoc_cse_is_enabled (cluster->client)) { bson_destroy (&encrypted); diff --git a/src/libmongoc/src/mongoc/mongoc-collection.c b/src/libmongoc/src/mongoc/mongoc-collection.c index 21f04d8f725..c1a87ffafe7 100644 --- a/src/libmongoc/src/mongoc/mongoc-collection.c +++ b/src/libmongoc/src/mongoc/mongoc-collection.c @@ -727,13 +727,7 @@ drop_with_opts_with_encryptedFields (mongoc_collection_t *collection, mongoc_collection_t *ecocCollection = NULL; bool ok = false; const char *name = mongoc_collection_get_name (collection); - bson_error_t local_error = {0}; - - if (!error) { - /* If no error is passed, use a local error. Error codes are checked - * when collections are dropped. */ - error = &local_error; - } + bson_error_reset (error); /* Drop ESC collection. */ escName = _mongoc_get_encryptedField_state_collection (encryptedFields, name, "esc", error); diff --git a/src/libmongoc/src/mongoc/mongoc-compression-private.h b/src/libmongoc/src/mongoc/mongoc-compression-private.h index 58083b2d3bf..6f12f39f803 100644 --- a/src/libmongoc/src/mongoc/mongoc-compression-private.h +++ b/src/libmongoc/src/mongoc/mongoc-compression-private.h @@ -20,6 +20,7 @@ #ifndef MONGOC_COMPRESSION_PRIVATE_H #define MONGOC_COMPRESSION_PRIVATE_H +#include #include /* Compressor IDs */ @@ -43,7 +44,7 @@ size_t mongoc_compressor_max_compressed_length (int32_t compressor_id, size_t size); bool -mongoc_compressor_supported (const char *compressor); +mongoc_compressor_supported (mstr_view compressor); const char * mongoc_compressor_id_to_name (int32_t compressor_id); diff --git a/src/libmongoc/src/mongoc/mongoc-compression.c b/src/libmongoc/src/mongoc/mongoc-compression.c index c711168ea3b..7f0fdd87f44 100644 --- a/src/libmongoc/src/mongoc/mongoc-compression.c +++ b/src/libmongoc/src/mongoc/mongoc-compression.c @@ -62,30 +62,33 @@ mongoc_compressor_max_compressed_length (int32_t compressor_id, size_t len) } bool -mongoc_compressor_supported (const char *compressor) +mongoc_compressor_supported (mstr_view compressor) { + bool have_snappy = false, have_zlib = false, have_zstd = false; #ifdef MONGOC_ENABLE_COMPRESSION_SNAPPY - if (!strcasecmp (compressor, MONGOC_COMPRESSOR_SNAPPY_STR)) { - return true; - } + have_snappy = true; #endif - #ifdef MONGOC_ENABLE_COMPRESSION_ZLIB - if (!strcasecmp (compressor, MONGOC_COMPRESSOR_ZLIB_STR)) { - return true; - } + have_zlib = true; #endif - #ifdef MONGOC_ENABLE_COMPRESSION_ZSTD - if (!strcasecmp (compressor, MONGOC_COMPRESSOR_ZSTD_STR)) { - return true; - } + have_zstd = true; #endif - if (!strcasecmp (compressor, MONGOC_COMPRESSOR_NOOP_STR)) { - return true; + if (mstr_latin_casecmp (compressor, ==, mstr_cstring ("snappy"))) { + return have_snappy; + } + if (mstr_latin_casecmp (compressor, ==, mstr_cstring ("zlib"))) { + return have_zlib; + } + if (mstr_latin_casecmp (compressor, ==, mstr_cstring ("zstd"))) { + return have_zstd; + } + if (mstr_latin_casecmp (compressor, ==, mstr_cstring ("noop"))) { + return true; // We always have "noop" } + // Any other compressor name is unrecognized return false; } diff --git a/src/libmongoc/src/mongoc/mongoc-error.c b/src/libmongoc/src/mongoc/mongoc-error.c index e25ac32670a..8d52d634c25 100644 --- a/src/libmongoc/src/mongoc/mongoc-error.c +++ b/src/libmongoc/src/mongoc/mongoc-error.c @@ -349,7 +349,11 @@ _mongoc_set_error (bson_error_t *error, uint32_t domain, uint32_t code, const ch va_list args; va_start (args, format); - bson_vsnprintf (error->message, sizeof error->message, format, args); + // Format into a temporary buf before copying into the error, as the existing + // error message may be an input to our formatting string + char buffer[sizeof (error->message)] = {0}; + bson_vsnprintf (buffer, sizeof error->message, format, args); + memcpy (&error->message, buffer, sizeof buffer); va_end (args); } } @@ -365,7 +369,9 @@ _mongoc_set_error_with_category ( va_list args; va_start (args, format); - bson_vsnprintf (error->message, sizeof error->message, format, args); + char buffer[sizeof (error->message)] = {0}; + bson_vsnprintf (buffer, sizeof error->message, format, args); + memcpy (&error->message, buffer, sizeof buffer); va_end (args); } } diff --git a/src/libmongoc/src/mongoc/mongoc-host-list-private.h b/src/libmongoc/src/mongoc/mongoc-host-list-private.h index 9c78e1d417c..4acaccd46e6 100644 --- a/src/libmongoc/src/mongoc/mongoc-host-list-private.h +++ b/src/libmongoc/src/mongoc/mongoc-host-list-private.h @@ -20,6 +20,7 @@ #define MONGOC_HOST_LIST_PRIVATE_H #include +#include BSON_BEGIN_DECLS @@ -38,7 +39,7 @@ _mongoc_host_list_from_string_with_err (mongoc_host_list_t *host_list, const cha bool _mongoc_host_list_from_hostport_with_err (mongoc_host_list_t *host_list, - const char *host, + mstr_view host, uint16_t port, bson_error_t *error); diff --git a/src/libmongoc/src/mongoc/mongoc-host-list.c b/src/libmongoc/src/mongoc/mongoc-host-list.c index 46248dee1dc..599ba396ad6 100644 --- a/src/libmongoc/src/mongoc/mongoc-host-list.c +++ b/src/libmongoc/src/mongoc/mongoc-host-list.c @@ -16,12 +16,12 @@ #include // PRIu16 -#include #include +#include /* strcasecmp on windows */ +#include #include #include -#include static mongoc_host_list_t * _mongoc_host_list_find_host_and_port (mongoc_host_list_t *hosts, const char *host_and_port) @@ -77,7 +77,6 @@ _mongoc_host_list_upsert (mongoc_host_list_t **list, const mongoc_host_list_t *n link->next = next_link; } - /* Duplicates a host list. */ mongoc_host_list_t * @@ -138,7 +137,6 @@ _mongoc_host_list_contains_one (mongoc_host_list_t *host_list, mongoc_host_list_ return NULL != _mongoc_host_list_find_host_and_port (host_list, host->host_and_port); } - /* *-------------------------------------------------------------------------- * @@ -181,113 +179,120 @@ _mongoc_host_list_from_string (mongoc_host_list_t *link_, const char *address) return true; } -bool -_mongoc_host_list_from_string_with_err (mongoc_host_list_t *link_, const char *address, bson_error_t *error) +static inline bool +_parse_host_ipv6 (mongoc_host_list_t *link, mstr_view addr, bson_error_t *error) { - char *close_bracket; - char *sport; - uint16_t port; - char *host; - bool ret; - bool ipv6 = false; - - close_bracket = strchr (address, ']'); - - /* if this is an ipv6 address. */ - if (close_bracket) { - /* if present, the port should immediately follow after ] */ - sport = strchr (close_bracket, ':'); - if (sport > close_bracket + 1) { + bson_error_reset (error); + _mongoc_set_error (error, 0, 0, "Invalid IPv6 literal address '%.*s'", MSTR_FMT (addr)); + // Find the opening bracket (must be the first char) + const size_t open_square_pos = mstr_find (addr, mstr_cstring ("["), 0, 1); + if (open_square_pos != 0) { + _mongoc_set_error (error, + MONGOC_ERROR_COMMAND, + MONGOC_ERROR_COMMAND_INVALID_ARG, + "%s: Must start with a bracket '['", + error->message); + return false; + } + // Find the closing bracket + const size_t close_square_pos = mstr_find (addr, mstr_cstring ("]")); + if (close_square_pos == SIZE_MAX) { + // Closing bracket is missing + _mongoc_set_error (error, + MONGOC_ERROR_COMMAND, + MONGOC_ERROR_COMMAND_INVALID_ARG, + "%s: Mising closing bracket ']'", + error->message); + return false; + } + // Find the port delimiter, if present. It must be the next character + const size_t port_delim_pos = mstr_find (addr, mstr_cstring (":"), close_square_pos + 1, 1); + + if (port_delim_pos == SIZE_MAX) { + // There is no port specifier, or it is misplaced, so the closing bracket + // should be the final character: + if (close_square_pos != addr.len - 1) { _mongoc_set_error (error, MONGOC_ERROR_COMMAND, MONGOC_ERROR_COMMAND_INVALID_ARG, - "If present, port should immediately follow the \"]\"" - "in an IPv6 address"); + "%s: Invalid trailing content following closing bracket ']'", + error->message); return false; } + } - /* otherwise ] should be the last char. */ - if (!sport && *(close_bracket + 1) != '\0') { + uint16_t port = MONGOC_DEFAULT_PORT; + if (port_delim_pos != SIZE_MAX) { + bson_error_t err2; + const mstr_view port_str = mstr_substr (addr, port_delim_pos + 1); + if (!_mongoc_parse_port (port_str, &port, &err2)) { _mongoc_set_error (error, MONGOC_ERROR_COMMAND, MONGOC_ERROR_COMMAND_INVALID_ARG, - "If port is not supplied, \"[\" should be the last" - "character"); + "%s: Invalid port '%.*s': %s", + error->message, + MSTR_FMT (port_str), + err2.message); return false; } + } - if (*address != '[') { - _mongoc_set_error ( - error, MONGOC_ERROR_COMMAND, MONGOC_ERROR_COMMAND_INVALID_ARG, "Missing matching bracket \"[\""); - return false; - } + return _mongoc_host_list_from_hostport_with_err ( + link, mstr_slice (addr, open_square_pos + 1, close_square_pos), port, error); +} - ipv6 = true; - } - /* otherwise, just find the first : */ - else { - sport = strchr (address, ':'); +static inline bool +_parse_host (mongoc_host_list_t *link, mstr_view spec, bson_error_t *error) +{ + if (mstr_contains (spec, mstr_cstring ("]"))) { + // There is a "]" bracket, so this is probably an IPv6 literal, which is + // more strict + return _parse_host_ipv6 (link, spec, error); } - - /* like "example.com:27019" or "[fe80::1]:27019", but not "[fe80::1]" */ - if (sport) { - if (sport == address) { - /* bad address like ":27017" */ + // Parsing anything else is simpler. + uint16_t port = MONGOC_DEFAULT_PORT; + // Try to split around the port delimiter: + mstr_view hostname, port_str; + if (mstr_split_around (spec, mstr_cstring (":"), &hostname, &port_str)) { + // We have a ":" delimiter. Try to parse it as a port number: + bson_error_t e2; + if (!_mongoc_parse_port (port_str, &port, &e2)) { + // Invalid port number _mongoc_set_error (error, MONGOC_ERROR_COMMAND, MONGOC_ERROR_COMMAND_INVALID_ARG, - "Bad address, \":\" should not be first character"); + "Invalid host specifier '%.*s': Invalid port string '%.*s': %s", + MSTR_FMT (spec), + MSTR_FMT (port_str), + e2.message); return false; } - - if (!mongoc_parse_port (&port, sport + 1)) { - _mongoc_set_error (error, MONGOC_ERROR_COMMAND, MONGOC_ERROR_COMMAND_INVALID_ARG, "Port could not be parsed"); - return false; - } - - /* if this is an ipv6 address, strip the [ and ] */ - if (ipv6) { - host = bson_strndup (address + 1, close_bracket - address - 1); - } else { - host = bson_strndup (address, sport - address); - } - } else { - /* if this is an ipv6 address, strip the [ and ] */ - if (ipv6) { - host = bson_strndup (address + 1, close_bracket - address - 1); - } else { - host = bson_strdup (address); - } - port = MONGOC_DEFAULT_PORT; } - ret = _mongoc_host_list_from_hostport_with_err (link_, host, port, error); - - bson_free (host); + return _mongoc_host_list_from_hostport_with_err (link, hostname, port, error); +} - return ret; +bool +_mongoc_host_list_from_string_with_err (mongoc_host_list_t *link_, const char *address, bson_error_t *error) +{ + return _parse_host (link_, mstr_cstring (address), error); } bool -_mongoc_host_list_from_hostport_with_err (mongoc_host_list_t *link_, - const char *host, - uint16_t port, - bson_error_t *error) +_mongoc_host_list_from_hostport_with_err (mongoc_host_list_t *link_, mstr_view host, uint16_t port, bson_error_t *error) { - BSON_ASSERT (host); BSON_ASSERT (link_); - size_t host_len = strlen (host); *link_ = (mongoc_host_list_t) { .next = NULL, .port = port, }; - if (host_len == 0) { + if (host.len == 0) { _mongoc_set_error (error, MONGOC_ERROR_STREAM, MONGOC_ERROR_STREAM_NAME_RESOLUTION, "Empty hostname in URI"); return false; } - if (host_len > BSON_HOST_NAME_MAX) { + if (host.len > BSON_HOST_NAME_MAX) { _mongoc_set_error (error, MONGOC_ERROR_STREAM, MONGOC_ERROR_STREAM_NAME_RESOLUTION, @@ -296,15 +301,15 @@ _mongoc_host_list_from_hostport_with_err (mongoc_host_list_t *link_, return false; } - bson_strncpy (link_->host, host, host_len + 1); + bson_strncpy (link_->host, host.data, host.len + 1); /* like "fe80::1" or "::1" */ - if (strchr (host, ':')) { + if (mstr_contains (host, mstr_cstring (":"))) { link_->family = AF_INET6; // Check that IPv6 literal is two less than the max to account for `[` and // `]` added below. - if (host_len > BSON_HOST_NAME_MAX - 2) { + if (host.len > BSON_HOST_NAME_MAX - 2) { _mongoc_set_error (error, MONGOC_ERROR_STREAM, MONGOC_ERROR_STREAM_NAME_RESOLUTION, @@ -319,9 +324,9 @@ _mongoc_host_list_from_hostport_with_err (mongoc_host_list_t *link_, BSON_ASSERT (mlib_in_range (size_t, req)); // Use `<`, not `<=` to account for NULL byte. BSON_ASSERT ((size_t) req < sizeof link_->host_and_port); - } else if (strchr (host, '/') && strstr (host, ".sock")) { + } else if (mstr_contains (host, mstr_cstring ("/")) && mstr_contains (host, mstr_cstring (".sock"))) { link_->family = AF_UNIX; - bson_strncpy (link_->host_and_port, link_->host, host_len + 1); + bson_strncpy (link_->host_and_port, link_->host, host.len + 1); } else { /* This is either an IPv4 or hostname. */ link_->family = AF_UNSPEC; diff --git a/src/libmongoc/src/mongoc/mongoc-http.c b/src/libmongoc/src/mongoc/mongoc-http.c index 7d8eff032af..dffa99440b3 100644 --- a/src/libmongoc/src/mongoc/mongoc-http.c +++ b/src/libmongoc/src/mongoc/mongoc-http.c @@ -121,7 +121,7 @@ _mongoc_http_send (const mongoc_http_request_t *req, memset (res, 0, sizeof (*res)); _mongoc_buffer_init (&http_response_buf, NULL, 0, NULL, NULL); - if (!_mongoc_host_list_from_hostport_with_err (&host_list, req->host, (uint16_t) req->port, error)) { + if (!_mongoc_host_list_from_hostport_with_err (&host_list, mstr_cstring (req->host), (uint16_t) req->port, error)) { goto fail; } diff --git a/src/libmongoc/src/mongoc/mongoc-topology.c b/src/libmongoc/src/mongoc/mongoc-topology.c index eae487c40a1..4931c24cd62 100644 --- a/src/libmongoc/src/mongoc/mongoc-topology.c +++ b/src/libmongoc/src/mongoc/mongoc-topology.c @@ -520,8 +520,8 @@ mongoc_topology_new (const mongoc_uri_t *uri, bool single_threaded) /* Use rr_data to update the topology's URI. */ if (rr_data.txt_record_opts && - !mongoc_uri_parse_options ( - topology->uri, rr_data.txt_record_opts, true /* from_dns */, &topology->scanner->error)) { + !_mongoc_uri_apply_query_string ( + topology->uri, mstr_cstring (rr_data.txt_record_opts), true /* from_dns */, &topology->scanner->error)) { GOTO (srv_fail); } diff --git a/src/libmongoc/src/mongoc/mongoc-ts-pool.c b/src/libmongoc/src/mongoc/mongoc-ts-pool.c index 7f15e1752be..ea73a8c7467 100644 --- a/src/libmongoc/src/mongoc/mongoc-ts-pool.c +++ b/src/libmongoc/src/mongoc/mongoc-ts-pool.c @@ -152,16 +152,7 @@ _new_item (mongoc_ts_pool *pool, bson_error_t *error) node->owner_pool = pool; if (pool->params.constructor) { /* To construct, we need to know if that constructor fails */ - bson_error_t my_error; - if (!error) { - /* Caller doesn't care about the error, but we care in case the - * constructor might fail */ - error = &my_error; - } - /* Clear the error */ - error->code = 0; - error->domain = 0; - error->message[0] = 0; + bson_error_reset (error); /* Construct the object */ pool->params.constructor (_pool_node_get_data (node), pool->params.userdata, error); if (error->code != 0) { diff --git a/src/libmongoc/src/mongoc/mongoc-uri-private.h b/src/libmongoc/src/mongoc/mongoc-uri-private.h index 2fed6078274..9de0273edc0 100644 --- a/src/libmongoc/src/mongoc/mongoc-uri-private.h +++ b/src/libmongoc/src/mongoc/mongoc-uri-private.h @@ -19,6 +19,7 @@ #ifndef MONGOC_URI_PRIVATE_H #define MONGOC_URI_PRIVATE_H +#include #include #include #include @@ -34,10 +35,19 @@ mongoc_uri_upsert_host (mongoc_uri_t *uri, const char *host, uint16_t port, bson void mongoc_uri_remove_host (mongoc_uri_t *uri, const char *host, uint16_t port); +/** + * @brief Update the settings on a URI based on a URI query string + * + * @param uri The URI to be updated + * @param options A string of key-value pairs, separated by "&", URI %-encoded. + * This should not include the leading "?" + * @param from_dns Whether this string comes from a DNS query + * @return true Upon success. The internal settings of the URI object have been updated + * @return false Otherwise. The internal settings of the URI object are unspecified. + */ bool -mongoc_uri_parse_host (mongoc_uri_t *uri, const char *str); -bool -mongoc_uri_parse_options (mongoc_uri_t *uri, const char *str, bool from_dns, bson_error_t *error); +_mongoc_uri_apply_query_string (mongoc_uri_t *uri, mstr_view options, bool from_dns, bson_error_t *error); + int32_t mongoc_uri_get_local_threshold_option (const mongoc_uri_t *uri); diff --git a/src/libmongoc/src/mongoc/mongoc-uri.c b/src/libmongoc/src/mongoc/mongoc-uri.c index b1e95ab601e..40e6b691d29 100644 --- a/src/libmongoc/src/mongoc/mongoc-uri.c +++ b/src/libmongoc/src/mongoc/mongoc-uri.c @@ -24,6 +24,8 @@ /* strcasecmp on windows */ #include +#include +#include #include #include #include @@ -60,6 +62,13 @@ struct _mongoc_uri_t { mongoc_write_concern_t *write_concern; }; +// Common strings we need to look for +static const mstr_view COLON = {":", 1}; +static const mstr_view COMMA = {",", 1}; +static const mstr_view QUESTION = {"?", 1}; +static const mstr_view SLASH = {"/", 1}; +static const mstr_view AT = {"@", 1}; + #define MONGOC_URI_ERROR(error, format, ...) \ _mongoc_set_error (error, MONGOC_ERROR_COMMAND, MONGOC_ERROR_COMMAND_INVALID_ARG, format, __VA_ARGS__) @@ -76,17 +85,6 @@ _mongoc_uri_set_option_as_int32_with_error (mongoc_uri_t *uri, const char *optio static bool _mongoc_uri_set_option_as_int64_with_error (mongoc_uri_t *uri, const char *option, int64_t value, bson_error_t *error); -static void -mongoc_uri_do_unescape (char **str) -{ - char *tmp; - - if ((tmp = *str)) { - *str = mongoc_uri_unescape (tmp); - bson_free (tmp); - } -} - #define VALIDATE_SRV_ERR() \ do { \ @@ -194,7 +192,7 @@ mongoc_uri_upsert_host (mongoc_uri_t *uri, const char *host, uint16_t port, bson mongoc_host_list_t temp; memset (&temp, 0, sizeof (mongoc_host_list_t)); - if (!_mongoc_host_list_from_hostport_with_err (&temp, host, port, error)) { + if (!_mongoc_host_list_from_hostport_with_err (&temp, mstr_cstring (host), port, error)) { return false; } @@ -207,134 +205,125 @@ mongoc_uri_remove_host (mongoc_uri_t *uri, const char *host, uint16_t port) _mongoc_host_list_remove_host (&(uri->hosts), host, port); } -/* - *-------------------------------------------------------------------------- - * - * scan_to_unichar -- - * - * Scans 'str' until either a character matching 'match' is found, - * until one of the characters in 'terminators' is encountered, or - * until we reach the end of 'str'. - * - * NOTE: 'terminators' may not include multibyte UTF-8 characters. - * - * Returns: - * If 'match' is found, returns a copy of the section of 'str' before - * that character. Otherwise, returns NULL. - * - * Side Effects: - * If 'match' is found, sets 'end' to begin at the matching character - * in 'str'. + +/** + * @brief %-decode a %-encoded string * - *-------------------------------------------------------------------------- + * @param sv The string to be decoded + * @return char* A pointer to a new C string, which must be freed with `bson_free`, + * or a null pointer in case of error */ - static char * -scan_to_unichar (const char *str, bson_unichar_t match, const char *terminators, const char **end) -{ - bson_unichar_t c; - const char *iter; - - for (iter = str; iter && *iter && (c = bson_utf8_get_char (iter)); iter = bson_utf8_next_char (iter)) { - if (c == match) { - *end = iter; - return bson_strndup (str, iter - str); - } else if (c == '\\') { - iter = bson_utf8_next_char (iter); - if (!bson_utf8_get_char (iter)) { - break; - } - } else { - const char *term_iter; - for (term_iter = terminators; *term_iter; term_iter++) { - if (mlib_cmp (c, ==, *term_iter)) { - return NULL; - } - } - } +_strdup_pct_decode (mstr_view const sv, bson_error_t *error) +{ + // Compute how many bytes we want to store + size_t bufsize = 0; + // Must use safe arithmetic because a pathological sv with `len == SIZE_MAX` is possible + bool add_okay = !mlib_add (&bufsize, sv.len, 1); + // Prepare the output region. We can allocate the whole thing up-front, because + // we know the decode result will be *at most* as long as `sv`, since %-encoding + // can only ever grow the plaintext string + char *const buf = add_okay ? bson_malloc0 (bufsize) : NULL; + // alloc or arithmetic failure + if (!buf) { + MONGOC_URI_ERROR (error, "%s", "Failed to allocate memory for the %%-decoding"); + return NULL; } - return NULL; -} - + // char-wise output + char *out = buf; + // Consume the input as we go + mstr_view remain = sv; + while (remain.len) { + if (remain.data[0] != '%') { + // Not a % char, just append it + *out++ = remain.data[0]; + remain = mstr_substr (remain, 1); + continue; + } + // %-sequence + if (remain.len < 3) { + MONGOC_URI_ERROR (error, + "At offset %llu: Truncated %%-sequence \"%.*s\"", + (long long unsigned) (sv.len - remain.len), + MSTR_FMT (remain)); + bson_free (buf); + return NULL; + } + // Grab the next two chars + mstr_view pair = mstr_substr (remain, 1, 2); + uint64_t v; + if (mlib_nat64_parse (pair, 16, &v)) { + MONGOC_URI_ERROR (error, + "At offset %llu: Invalid %%-sequence \"%.3s\"", + (long long unsigned) (sv.len - remain.len), + remain.data); + bson_free (buf); + return NULL; + } -static bool -mongoc_uri_parse_scheme (mongoc_uri_t *uri, const char *str, const char **end) -{ - if (!strncmp (str, "mongodb+srv://", 14)) { - uri->is_srv = true; - *end = str + 14; - return true; + // Append the decoded byte value + *out++ = (char) v; + // Drop the "%xy" sequence + remain = mstr_substr (remain, 3); } - if (!strncmp (str, "mongodb://", 10)) { - uri->is_srv = false; - *end = str + 10; - return true; - } - - return false; -} - - -static bool -mongoc_uri_has_unescaped_chars (const char *str, const char *chars) -{ - const char *c; - const char *tmp; - char *s; - - for (c = chars; *c; c++) { - s = scan_to_unichar (str, (bson_unichar_t) *c, "", &tmp); - if (s) { - bson_free (s); - return true; - } + // Check whether the decoded result is valid UTF-8 + size_t len = (size_t) (out - buf); + if (!bson_utf8_validate (buf, len, false)) { + MONGOC_URI_ERROR ( + error, "%s", "Invalid %%-encoded string: The decoded result is not valid UTF-8 or contains null characters"); + bson_free (buf); + return NULL; } - return false; + return buf; } -/* "str" is non-NULL, the part of URI between "mongodb://" and first "@" */ +/** + * @brief Parse the userinfo segment from a URI string + * + * @param uri The URI to be updated + * @param userpass The userinfo segment from the original URI string + * @return true If the operation succeeds + * @return false Otherwise + */ static bool -mongoc_uri_parse_userpass (mongoc_uri_t *uri, const char *str, bson_error_t *error) +_uri_parse_userinfo (mongoc_uri_t *uri, mstr_view userpass, bson_error_t *error) { - const char *prohibited = "@:/"; - const char *end_user; - - BSON_ASSERT (str); + bson_error_reset (error); BSON_ASSERT (uri); - if ((uri->username = scan_to_unichar (str, ':', "", &end_user))) { - uri->password = bson_strdup (end_user + 1); - } else { - uri->username = bson_strdup (str); - uri->password = NULL; - } + // Split the user/pass around the colon: + mstr_view username, password; + const bool has_password = mstr_split_around (userpass, COLON, &username, &password); - if (mongoc_uri_has_unescaped_chars (uri->username, prohibited)) { - MONGOC_URI_ERROR (error, "Username \"%s\" must not have unescaped chars. %s", uri->username, escape_instructions); + // Check if the username has invalid unescaped characters + const mstr_view PROHIBITED_CHARS = mstr_cstring ("@:/"); + if (mstr_find_first_of (username, PROHIBITED_CHARS) != SIZE_MAX) { + MONGOC_URI_ERROR ( + error, "Username \"%.*s\" must not have unescaped chars. %s", MSTR_FMT (username), escape_instructions); + return false; + } + if (mstr_find_first_of (password, PROHIBITED_CHARS) != SIZE_MAX) { + MONGOC_URI_ERROR ( + error, "Password \"%.*s\" must not have unescaped chars. %s", MSTR_FMT (password), escape_instructions); return false; } - mongoc_uri_do_unescape (&uri->username); + // Store the username and password on the URI + uri->username = _strdup_pct_decode (username, error); if (!uri->username) { - MONGOC_URI_ERROR (error, "Incorrect URI escapes in username. %s", escape_instructions); + MONGOC_URI_ERROR (error, "Invalid username \"%.*s\" in URI string: %s", MSTR_FMT (username), error->message); return false; } /* Providing password at all is optional */ - if (uri->password) { - if (mongoc_uri_has_unescaped_chars (uri->password, prohibited)) { - MONGOC_URI_ERROR ( - error, "Password \"%s\" must not have unescaped chars. %s", uri->password, escape_instructions); - return false; - } - - mongoc_uri_do_unescape (&uri->password); + if (has_password) { + uri->password = _strdup_pct_decode (password, error); if (!uri->password) { - MONGOC_URI_ERROR (error, "%s", "Incorrect URI escapes in password"); + MONGOC_URI_ERROR (error, "Invalid password \"%.*s\" in URI string: %s", MSTR_FMT (password), error->message); return false; } } @@ -342,55 +331,64 @@ mongoc_uri_parse_userpass (mongoc_uri_t *uri, const char *str, bson_error_t *err return true; } -bool -mongoc_uri_parse_host (mongoc_uri_t *uri, const char *host_and_port_in) +/** + * @brief Parse a single host specifier for a URI + * + * @param uri The URI object to be updated + * @param hostport A host specifier, with an optional port + * @return true If the operation succeeds + * @return false Otherwise + */ +static bool +_parse_one_host (mongoc_uri_t *uri, mstr_view hostport, bson_error_t *error) { - char *host_and_port = bson_strdup (host_and_port_in); - bson_error_t err = {0}; - bool r; - - /* unescape host. It doesn't hurt including port. */ - if (mongoc_uri_has_unescaped_chars (host_and_port, "/")) { + bson_error_reset (error); + // Don't allow an unescaped "/" in the host string. + if (mstr_find (hostport, SLASH) != SIZE_MAX) { + // They were probably trying to do a unix socket. Those slashes must be escaped MONGOC_WARNING ("Unix Domain Sockets must be escaped (e.g. / = %%2F)"); - bson_free (host_and_port); return false; } - mongoc_uri_do_unescape (&host_and_port); + /* unescape host. It doesn't hurt including port. */ + char *host_and_port = _strdup_pct_decode (hostport, error); if (!host_and_port) { /* invalid */ - bson_free (host_and_port); + MONGOC_URI_ERROR (error, "Invalid host specifier \"%.*s\": %s", MSTR_FMT (hostport), error->message); return false; } - r = mongoc_uri_upsert_host_and_port (uri, host_and_port, &err); - - if (!r) { - MONGOC_ERROR ("%s", err.message); - bson_free (host_and_port); - return false; + const bool okay = mongoc_uri_upsert_host_and_port (uri, host_and_port, error); + if (!okay) { + MONGOC_URI_ERROR (error, "Invalid host specifier \"%s\": %s", host_and_port, error->message); } bson_free (host_and_port); - return true; + return okay; } +/** + * @brief Parse the single SRV host specifier for a URI + * + * @param uri The URI to be updated + * @param str The host string for the URI. Should specify a single SRV name + * @return true If the operation succeeds + * @return false Otherwise + */ static bool -mongoc_uri_parse_srv (mongoc_uri_t *uri, const char *str, bson_error_t *error) +_parse_srv_hostname (mongoc_uri_t *uri, mstr_view str, bson_error_t *error) { - if (*str == '\0') { + bson_error_reset (error); + if (str.len == 0) { MONGOC_URI_ERROR (error, "%s", "Missing service name in SRV URI"); return false; } { - char *service = bson_strdup (str); - - mongoc_uri_do_unescape (&service); - + char *service = _strdup_pct_decode (str, error); if (!service || !valid_hostname (service) || count_dots (service) < 2) { - MONGOC_URI_ERROR (error, "%s", "Invalid service name in URI"); + MONGOC_URI_ERROR (error, "Invalid SRV service name \"%.*s\" in URI: %s", MSTR_FMT (str), error->message); bson_free (service); return false; } @@ -414,100 +412,83 @@ mongoc_uri_parse_srv (mongoc_uri_t *uri, const char *str, bson_error_t *error) } -/* "hosts" is non-NULL, the part between "mongodb://" or "@" and last "/" */ +/** + * @brief Parse the comma-separate list of host+port specifiers and store them in `uri` + * + * @param uri The URI object to be updated + * @param hosts A non-empty comma-separated list of host specifiers + * @param error An error object to be updated in case of failure + * @return true If the operation succeeds and at least one host was added to `uri` + * @return false Otherise. `error` will be updated. + */ static bool -mongoc_uri_parse_hosts (mongoc_uri_t *uri, const char *hosts) -{ - const char *next; - const char *end_hostport; - char *s; - BSON_ASSERT (hosts); - /* - * Parsing the series of hosts is a lot more complicated than you might - * imagine. This is due to some characters being both separators as well as - * valid characters within the "hostname". In particularly, we can have file - * paths to specify paths to UNIX domain sockets. We impose the restriction - * that they must be suffixed with ".sock" to simplify the parsing. - * - * You can separate hosts and file system paths to UNIX domain sockets with - * ",". - */ - s = scan_to_unichar (hosts, '?', "", &end_hostport); - if (s) { - MONGOC_WARNING ("%s", "A '/' is required between the host list and any options."); - goto error; - } - next = hosts; - do { - /* makes a copy of the section of the string */ - s = scan_to_unichar (next, ',', "", &end_hostport); - if (s) { - next = (char *) end_hostport + 1; - } else { - s = bson_strdup (next); - next = NULL; - } - if (!mongoc_uri_parse_host (uri, s)) { - goto error; +_parse_hosts_csv (mongoc_uri_t *uri, mstr_view const hosts, bson_error_t *error) +{ + bson_error_reset (error); + // Check if there is a question mark in the given hostinfo string. This indicates that + // the user omitted a required "/" before the query component + if (mstr_find (hosts, QUESTION) != SIZE_MAX) { + MONGOC_URI_ERROR (error, "%s", "A '/' is required between the host list and any options."); + return false; + } + // We require at least one host in the host list in order to be a valid host CSV + if (!hosts.len) { + MONGOC_URI_ERROR (error, "%s", "Host list of URI string cannot be empty"); + return false; + } + + // Split around commas + for (mstr_view remain = hosts; remain.len;) { + mstr_view host; + mstr_split_around (remain, COMMA, &host, &remain); + if (!_parse_one_host (uri, host, error)) { + return false; } - bson_free (s); - } while (next); + } + return true; -error: - bson_free (s); - return false; } -/* ----------------------------------------------------------------------------- +/** + * @brief Handle the URI path component * - * mongoc_uri_parse_database -- + * @param uri The URI object to be updated + * @param path The path component of the original URI string. May be empty if + * there was no path in the input string, but should start with the leading + * slash if it is non-empty. + * @return true If the operation succeeds + * @return false Otherwise * - * Parse the database after @str. @str is expected to point after the - * host list to the character immediately after the / in the uri string. - * If no database is specified in the uri, e.g. the uri has a form like: - * mongodb://localhost/?option=X then uri->database remains NULL after - * parsing. - * - * Return: - * True if the parsed database is valid. An empty database is considered - * valid. - * ----------------------------------------------------------------------------- + * We use the URI path to specify the database to be associated with the URI. + * We only expect a single path element. If the path is just a slash "/", then + * that is the same as omitting the path entirely. */ static bool -mongoc_uri_parse_database (mongoc_uri_t *uri, const char *str, const char **end) -{ - const char *end_database; - const char *c; - char *invalid_c; - const char *tmp; - - if ((uri->database = scan_to_unichar (str, '?', "", &end_database))) { - if (strcmp (uri->database, "") == 0) { - /* no database is found, don't store the empty string. */ - bson_free (uri->database); - uri->database = NULL; - /* but it is valid to have an empty database. */ - return true; - } - *end = end_database; - } else if (*str) { - uri->database = bson_strdup (str); - *end = str + strlen (str); +_parse_path (mongoc_uri_t *uri, mstr_view path, bson_error_t *error) +{ + bson_error_reset (error); + // Drop the leading slash, if present. If the URI has no path, then `path` + // will already be an empty string. + const mstr_view relative = path.len ? mstr_substr (path, 1) : path; + + if (!relative.len) { + // Empty/absent path is no database + uri->database = NULL; + return true; } - mongoc_uri_do_unescape (&uri->database); + // %-decode the path as the database name + uri->database = _strdup_pct_decode (relative, error); if (!uri->database) { - /* invalid */ + // %-decode failure + MONGOC_URI_ERROR (error, "Invalid database specifier \"%.*s\": %s", MSTR_FMT (relative), error->message); return false; } - /* invalid characters in database name */ - for (c = "/\\. \"$"; *c; c++) { - invalid_c = scan_to_unichar (uri->database, (bson_unichar_t) *c, "", &tmp); - if (invalid_c) { - bson_free (invalid_c); - return false; - } + // Check if the database name contains and invalid characters after the %-decode + if (mstr_contains_any_of (mstr_cstring (uri->database), mstr_cstring ("/\\. \n\r\f\t\"$"))) { + MONGOC_URI_ERROR (error, "Invalid database specifier \"%s\": Contains disallowed characters", uri->database); + return false; } return true; @@ -515,48 +496,20 @@ mongoc_uri_parse_database (mongoc_uri_t *uri, const char *str, const char **end) static bool -mongoc_uri_parse_auth_mechanism_properties (mongoc_uri_t *uri, const char *str) +_parse_and_set_auth_mechanism_properties (mongoc_uri_t *uri, const char *str) { - const char *end_scan; - bson_t properties = BSON_INITIALIZER; - // Key-value pairs are delimited by ','. - for (char *kvp; (kvp = scan_to_unichar (str, ',', "", &end_scan)); bson_free (kvp)) { - str = end_scan + 1; - - char *const key = scan_to_unichar (kvp, ':', "", &end_scan); - - // Found delimiter: split into key and value. - if (key) { - char *const value = bson_strdup (end_scan + 1); - BSON_APPEND_UTF8 (&properties, key, value); - bson_free (key); - bson_free (value); - } - - // No delimiter: entire string is the key. Use empty string as value. - else { - BSON_APPEND_UTF8 (&properties, kvp, ""); - } - } - - // Last (or only) pair. - if (*str != '\0') { - char *const key = scan_to_unichar (str, ':', "", &end_scan); - - // Found delimiter: split into key and value. - if (key) { - char *const value = bson_strdup (end_scan + 1); - BSON_APPEND_UTF8 (&properties, key, value); - bson_free (key); - bson_free (value); - } - - // No delimiter: entire string is the key. Use empty string as value. - else { - BSON_APPEND_UTF8 (&properties, str, ""); - } + mstr_view remain = mstr_cstring (str); + while (remain.len) { + // Get the entry until the next comma + mstr_view entry; + mstr_split_around (remain, COMMA, &entry, &remain); + // Split around the colon. If no colon, makes an empty value. + mstr_view key, value; + mstr_split_around (entry, COLON, &key, &value); + // Accumulate properties + bson_append_utf8 (&properties, key.data, (int) key.len, value.data, (int) value.len); } /* append our auth properties to our credentials */ @@ -630,108 +583,110 @@ mongoc_uri_check_srv_service_name (mongoc_uri_t *uri, const char *str) } static bool -mongoc_uri_parse_tags (mongoc_uri_t *uri, /* IN */ - const char *str) /* IN */ -{ - const char *end_keyval; - const char *end_key; - bson_t b; - char *keyval; - char *key; - - bson_init (&b); - -again: - if ((keyval = scan_to_unichar (str, ',', "", &end_keyval))) { - if (!(key = scan_to_unichar (keyval, ':', "", &end_key))) { - bson_free (keyval); +_apply_read_prefs_tags (mongoc_uri_t *uri, /* IN */ + const char *str) /* IN */ +{ + bson_t b = BSON_INITIALIZER; + bool okay = false; + + for (mstr_view remain = mstr_cstring (str); remain.len;) { + mstr_view entry; + mstr_split_around (remain, COMMA, &entry, &remain); + mstr_view key, value; + if (!mstr_split_around (entry, COLON, &key, &value)) { + // The entry does not have a colon. This is invalid for tags + MONGOC_WARNING ("Unsupported value for \"" MONGOC_URI_READPREFERENCETAGS "\": \"%s\"", str); goto fail; } - - bson_append_utf8 (&b, key, -1, end_key + 1, -1); - bson_free (key); - bson_free (keyval); - str = end_keyval + 1; - goto again; - } else if ((key = scan_to_unichar (str, ':', "", &end_key))) { - bson_append_utf8 (&b, key, -1, end_key + 1, -1); - bson_free (key); - } else if (strlen (str)) { - /* we're not finished but we couldn't parse the string */ - goto fail; + bson_append_utf8 (&b, key.data, (int) key.len, value.data, (int) value.len); } mongoc_read_prefs_add_tag (uri->read_prefs, &b); - bson_destroy (&b); - - return true; + okay = true; fail: - MONGOC_WARNING ("Unsupported value for \"" MONGOC_URI_READPREFERENCETAGS "\": \"%s\"", str); bson_destroy (&b); - return false; + return okay; } - -/* - *-------------------------------------------------------------------------- - * - * mongoc_uri_bson_append_or_replace_key -- - * - * - * Appends 'option' to the end of 'options' if not already set. - * - * Since we cannot grow utf8 strings inline, we have to allocate a - * temporary bson variable and splice in the new value if the key - * is already set. - * - * NOTE: This function keeps the order of the BSON keys. - * - * NOTE: 'option' is case*in*sensitive. - * +/** + * @brief Remove a BSON element with the given key, case-insensitive * - *-------------------------------------------------------------------------- + * @param doc The document to be updtaed + * @param key The key to be removed */ - static void -mongoc_uri_bson_append_or_replace_key (bson_t *options, const char *option, const char *value) +_bson_erase_icase (bson_t *doc, mstr_view key) { bson_iter_t iter; - bool found = false; + if (!bson_iter_init (&iter, doc)) { + return; + } - if (bson_iter_init (&iter, options)) { - bson_t tmp = BSON_INITIALIZER; + bson_t tmp = BSON_INITIALIZER; + while (bson_iter_next (&iter)) { + if (mstr_latin_casecmp (mstr_cstring (bson_iter_key (&iter)), !=, key)) { + const bson_value_t *const bvalue = bson_iter_value (&iter); + BSON_APPEND_VALUE (&tmp, bson_iter_key (&iter), bvalue); + } + } - while (bson_iter_next (&iter)) { - const bson_value_t *bvalue; + bson_destroy (doc); + bson_copy_to (&tmp, doc); + bson_destroy (&tmp); +} - if (!strcasecmp (bson_iter_key (&iter), option)) { - bson_append_utf8 (&tmp, option, -1, value, -1); - found = true; - continue; - } +/** + * @brief Update a BSON document with a UTF-8 value, replacing it if it alread + * exists + * + * @param options The doc to be updated + * @param key The case-insensitive string of the to be added/updated + * @param value The UTF-8 string that will be inserted or removed + * + * @note This will case-normalize the key string to lowercase before inserting it. + */ +static void +_bson_upsert_utf8_icase (bson_t *options, mstr_view key, const char *value) +{ + _bson_erase_icase (options, key); - bvalue = bson_iter_value (&iter); - BSON_APPEND_VALUE (&tmp, bson_iter_key (&iter), bvalue); - } + // Lowercase the key, preventing the need for all callers to do this normalization + // themselves. + char *lower = bson_strndup (key.data, key.len); + mongoc_lowercase_inplace (lower); + bson_append_utf8 (options, lower, -1, value, -1); + bson_free (lower); +} - if (!found) { - bson_append_utf8 (&tmp, option, -1, value, -1); +/** + * @brief Initialize an iterator to point to the named element, case-insensitive + * + * @param iter Storage for an iterator to be updated + * @param doc The document to be searched + * @param key The key to find, case-insensitive + * @return true If the element was found, and `*iter` is updated + * @return false Otherwise + */ +static inline bool +_bson_init_iter_find_icase (bson_iter_t *iter, bson_t const *doc, mstr_view key) +{ + if (!bson_iter_init (iter, doc)) { + return false; + } + while (bson_iter_next (iter)) { + if (mstr_latin_casecmp (mstr_cstring (bson_iter_key (iter)), ==, key)) { + return true; } - - bson_destroy (options); - bson_copy_to (&tmp, options); - bson_destroy (&tmp); } + return false; } - bool mongoc_uri_has_option (const mongoc_uri_t *uri, const char *key) { bson_iter_t iter; - - return bson_iter_init_find_case (&iter, &uri->options, key); + return _bson_init_iter_find_icase (&iter, &uri->options, mstr_cstring (key)); } bool @@ -808,118 +763,94 @@ mongoc_uri_canonicalize_option (const char *key) } } +/** + * @brief Test whether the given URI parameter is allowed to be specified in + * a DNS record. + * + * @param key The parameter key string, case-insensitive + * @return true If the option is valid in a DNS record + * @return false Otherwise + */ static bool -_mongoc_uri_parse_int64 (const char *key, const char *value, int64_t *result) -{ - char *endptr; - int64_t i; - - errno = 0; - i = bson_ascii_strtoll (value, &endptr, 10); - if (errno || endptr < value + strlen (value)) { - MONGOC_WARNING ("Invalid %s: cannot parse integer\n", key); - return false; - } - - *result = i; - return true; -} - - -static bool -mongoc_uri_parse_int32 (const char *key, const char *value, int32_t *result) -{ - int64_t i; - - if (!_mongoc_uri_parse_int64 (key, value, &i)) { - /* _mongoc_uri_parse_int64 emits a warning if it could not parse the - * given value, so we don't have to add one here. - */ - return false; - } - - if (i > INT32_MAX || i < INT32_MIN) { - MONGOC_WARNING ("Invalid %s: cannot fit in int32\n", key); - return false; - } - - *result = (int32_t) i; - return true; -} - - -static bool -dns_option_allowed (const char *lkey) +dns_option_allowed (mstr_view key) { /* Initial DNS Seedlist Discovery Spec: "A Client MUST only support the * authSource, replicaSet, and loadBalanced options through a TXT record, and * MUST raise an error if any other option is encountered." */ - return !strcmp (lkey, MONGOC_URI_AUTHSOURCE) || !strcmp (lkey, MONGOC_URI_REPLICASET) || - !strcmp (lkey, MONGOC_URI_LOADBALANCED); + return mstr_latin_casecmp (key, ==, mstr_cstring (MONGOC_URI_AUTHSOURCE)) || + mstr_latin_casecmp (key, ==, mstr_cstring (MONGOC_URI_LOADBALANCED)) || + mstr_latin_casecmp (key, ==, mstr_cstring (MONGOC_URI_REPLICASET)); } - -/* Decompose a key=val pair and place them into a document. - * Includes case-folding for key portion. +/** + * @brief Apply a single query parameter to a URI from a string + * + * @param uri The object to be updated. + * @param options The URI options data that will also be updated. + * @param str The percent-encoded query string element to be decoded. + * @param from_dns Whether this query string comes from a DNS result + * @retval true Upon success + * @retval false Otherwise, and sets `*error` */ static bool -mongoc_uri_split_option (mongoc_uri_t *uri, bson_t *options, const char *str, bool from_dns, bson_error_t *error) +_handle_pct_uri_query_param (mongoc_uri_t *uri, bson_t *options, mstr_view str, bool from_dns, bson_error_t *error) { - bson_iter_t iter; - const char *end_key; - char *key = NULL; - char *lkey = NULL; + bson_error_reset (error); + // The argument value, with percent-encoding removed char *value = NULL; - const char *opt; - char *opt_end; - size_t opt_len; + // Whether the operation succeeded bool ret = false; - if (!(key = scan_to_unichar (str, '=', "", &end_key))) { - MONGOC_URI_ERROR (error, "URI option \"%s\" contains no \"=\" sign", str); - goto CLEANUP; - } - - value = bson_strdup (end_key + 1); - mongoc_uri_do_unescape (&value); - if (!value) { - /* do_unescape detected invalid UTF-8 and freed value */ - MONGOC_URI_ERROR (error, "Value for URI option \"%s\" contains invalid UTF-8", key); - goto CLEANUP; + mstr_view key, val_pct; + if (!mstr_split_around (str, mstr_cstring ("="), &key, &val_pct)) { + MONGOC_URI_ERROR (error, "URI option \"%.*s\" contains no \"=\" sign", MSTR_FMT (str)); + goto done; } - lkey = bson_strdup (key); - mongoc_lowercase (key, lkey); - /* Initial DNS Seedlist Discovery Spec: "A Client MUST only support the * authSource, replicaSet, and loadBalanced options through a TXT record, and * MUST raise an error if any other option is encountered."*/ - if (from_dns && !dns_option_allowed (lkey)) { - MONGOC_URI_ERROR (error, "URI option \"%s\" prohibited in TXT record", key); - goto CLEANUP; + if (from_dns && !dns_option_allowed (key)) { + MONGOC_URI_ERROR (error, "URI option \"%.*s\" prohibited in TXT records", MSTR_FMT (key)); + goto done; + } + + value = _strdup_pct_decode (val_pct, error); + if (!value) { + /* do_unescape detected invalid UTF-8 and freed value */ + MONGOC_URI_ERROR (error, "Value for URI option \"%.*s\" contains is invalid: %s", MSTR_FMT (key), error->message); + goto done; } - /* Special case: READPREFERENCETAGS is a composing option. + /* Special case: readPreferenceTags is a composing option. * Multiple instances should append, not overwrite. * Encode them directly to the options field, * bypassing canonicalization and duplicate checks. */ - if (!strcmp (lkey, MONGOC_URI_READPREFERENCETAGS)) { - if (!mongoc_uri_parse_tags (uri, value)) { - MONGOC_URI_ERROR (error, "Unsupported value for \"%s\": \"%s\"", key, value); - goto CLEANUP; + if (mstr_latin_casecmp (key, ==, mstr_cstring (MONGOC_URI_READPREFERENCETAGS))) { + if (!_apply_read_prefs_tags (uri, value)) { + MONGOC_URI_ERROR (error, "Unsupported value for \"%.*s\": \"%s\"", MSTR_FMT (key), value); + goto done; + } else { + ret = true; + goto done; } - } else if (bson_iter_init_find (&iter, &uri->raw, lkey) || bson_iter_init_find (&iter, options, lkey)) { + } + + // Handle case where the option has already been specified + bson_iter_t iter; + if (_bson_init_iter_find_icase (&iter, &uri->raw, key) || _bson_init_iter_find_icase (&iter, options, key)) { /* Special case, MONGOC_URI_W == "any non-int" is not overridden * by later values. */ - if (!strcmp (lkey, MONGOC_URI_W) && (opt = bson_iter_utf8_unsafe (&iter, &opt_len))) { - strtol (opt, &opt_end, 10); - if (*opt_end != '\0') { - ret = true; - goto CLEANUP; - } + size_t opt_len; + if (mstr_latin_casecmp (key, ==, mstr_cstring (MONGOC_URI_W)) && + mlib_i64_parse (mstr_cstring (bson_iter_utf8_unsafe (&iter, &opt_len)), NULL)) { + // Value is a "w", and is not a valid integer, but we already have a valid "w" + // value, so don't overwrite it + ret = true; + goto done; } /* Initial DNS Seedlist Discovery Spec: "Client MUST use options @@ -927,32 +858,34 @@ mongoc_uri_split_option (mongoc_uri_t *uri, bson_t *options, const char *str, bo * through TXT records." So, do NOT override existing options with TXT * options. */ if (from_dns) { - if (0 == strcmp (lkey, MONGOC_URI_AUTHSOURCE)) { + if (mstr_latin_casecmp (key, ==, mstr_cstring (MONGOC_URI_AUTHSOURCE))) { // Treat `authSource` as a special case. A server may support authentication with multiple mechanisms. // MONGODB-X509 requires authSource=$external. SCRAM-SHA-256 requires authSource=admin. // Only log a trace message since this may be expected. - TRACE ("Ignoring URI option \"%s\" from TXT record \"%s\". Option is already present in URI", key, str); + TRACE ("Ignoring URI option \"%.*s\" from TXT record \"%.*s\". Option is already present in URI", + MSTR_FMT (key), + MSTR_FMT (str)); } else { - MONGOC_WARNING ( - "Ignoring URI option \"%s\" from TXT record \"%s\". Option is already present in URI", key, str); + MONGOC_WARNING ("Ignoring URI option \"%.*s\" from TXT record \"%.*s\". Option is already present in URI", + MSTR_FMT (key), + MSTR_FMT (str)); } ret = true; - goto CLEANUP; + goto done; } - MONGOC_WARNING ("Overwriting previously provided value for '%s'", key); + MONGOC_WARNING ("Overwriting previously provided value for '%.*s'", MSTR_FMT (key)); } - if (!(strcmp (lkey, MONGOC_URI_REPLICASET)) && *value == '\0') { - MONGOC_URI_ERROR (error, "Value for URI option \"%s\" cannot be empty string", lkey); - goto CLEANUP; + // Reject replicaSet="" + if (mstr_latin_casecmp (key, ==, mstr_cstring (MONGOC_URI_REPLICASET)) && strlen (value) == 0) { + MONGOC_URI_ERROR (error, "Value for URI option \"%.*s\" cannot be empty string", MSTR_FMT (key)); + goto done; } - mongoc_uri_bson_append_or_replace_key (options, lkey, value); + _bson_upsert_utf8_icase (options, key, value); ret = true; -CLEANUP: - bson_free (key); - bson_free (lkey); +done: bson_free (value); return ret; @@ -983,7 +916,7 @@ mongoc_uri_options_validate_names (const bson_t *a, const bson_t *b, bson_error_ value = bson_iter_utf8_unsafe (&key_iter, &value_len); canon = mongoc_uri_canonicalize_option (key); - if (key == canon) { + if (mstr_latin_casecmp (mstr_cstring (key), ==, mstr_cstring (canon))) { /* Canonical form, no point checking `b`. */ continue; } @@ -991,7 +924,7 @@ mongoc_uri_options_validate_names (const bson_t *a, const bson_t *b, bson_error_ /* Check for a conflict in `a`. */ if (bson_iter_init_find (&canon_iter, a, canon)) { cval = bson_iter_utf8_unsafe (&canon_iter, &cval_len); - if ((value_len != cval_len) || strcmp (value, cval)) { + if (mstr_cmp (mstr_cstring (cval), !=, mstr_cstring (value))) { goto HANDLE_CONFLICT; } } @@ -999,7 +932,7 @@ mongoc_uri_options_validate_names (const bson_t *a, const bson_t *b, bson_error_ /* Check for a conflict in `b`. */ if (bson_iter_init_find (&canon_iter, b, canon)) { cval = bson_iter_utf8_unsafe (&canon_iter, &cval_len); - if ((value_len != cval_len) || strcmp (value, cval)) { + if (mstr_cmp (mstr_cstring (cval), !=, mstr_cstring (value))) { goto HANDLE_CONFLICT; } } @@ -1033,8 +966,6 @@ static bool mongoc_uri_apply_options (mongoc_uri_t *uri, const bson_t *options, bool from_dns, bson_error_t *error) { bson_iter_t iter; - int32_t v_int; - int64_t v_int64; const char *key = NULL; const char *canon = NULL; const char *value = NULL; @@ -1048,18 +979,19 @@ mongoc_uri_apply_options (mongoc_uri_t *uri, const bson_t *options, bool from_dn value = bson_iter_utf8_unsafe (&iter, &value_len); /* Keep a record of how the option was originally presented. */ - mongoc_uri_bson_append_or_replace_key (&uri->raw, key, value); + _bson_upsert_utf8_icase (&uri->raw, mstr_cstring (key), value); /* This check precedes mongoc_uri_option_is_int32 as all 64-bit values are * also recognised as 32-bit ints. */ if (mongoc_uri_option_is_int64 (key)) { if (0 < strlen (value)) { - if (!_mongoc_uri_parse_int64 (key, value, &v_int64)) { + int64_t i64 = 42424242; + if (mlib_i64_parse (mstr_cstring (value), &i64)) { goto UNSUPPORTED_VALUE; } - if (!_mongoc_uri_set_option_as_int64_with_error (uri, canon, v_int64, error)) { + if (!_mongoc_uri_set_option_as_int64_with_error (uri, canon, i64, error)) { return false; } } else { @@ -1067,26 +999,27 @@ mongoc_uri_apply_options (mongoc_uri_t *uri, const bson_t *options, bool from_dn } } else if (mongoc_uri_option_is_int32 (key)) { if (0 < strlen (value)) { - if (!mongoc_uri_parse_int32 (key, value, &v_int)) { + int32_t i32 = 42424242; + if (mlib_i32_parse (mstr_cstring (value), &i32)) { goto UNSUPPORTED_VALUE; } - if (!_mongoc_uri_set_option_as_int32_with_error (uri, canon, v_int, error)) { + if (!_mongoc_uri_set_option_as_int32_with_error (uri, canon, i32, error)) { return false; } } else { MONGOC_WARNING ("Empty value provided for \"%s\"", key); } } else if (!strcmp (key, MONGOC_URI_W)) { - if (*value == '-' || isdigit (*value)) { - v_int = (int) strtol (value, NULL, 10); - _mongoc_uri_set_option_as_int32 (uri, MONGOC_URI_W, v_int); + int32_t i32 = 42424242; + if (!mlib_i32_parse (mstr_cstring (value), 10, &i32)) { + // A valid integer 'w' value. + _mongoc_uri_set_option_as_int32 (uri, MONGOC_URI_W, i32); } else if (0 == strcasecmp (value, "majority")) { - mongoc_uri_bson_append_or_replace_key (&uri->options, MONGOC_URI_W, "majority"); + _bson_upsert_utf8_icase (&uri->options, mstr_cstring (MONGOC_URI_W), "majority"); } else if (*value) { - mongoc_uri_bson_append_or_replace_key (&uri->options, MONGOC_URI_W, value); + _bson_upsert_utf8_icase (&uri->options, mstr_cstring (MONGOC_URI_W), value); } - } else if (mongoc_uri_option_is_bool (key)) { if (0 < strlen (value)) { if (0 == strcasecmp (value, "true")) { @@ -1132,7 +1065,7 @@ mongoc_uri_apply_options (mongoc_uri_t *uri, const bson_t *options, bool from_dn if (bson_has_field (&uri->credentials, key)) { HANDLE_DUPE (); } - mongoc_uri_bson_append_or_replace_key (&uri->credentials, canon, value); + _bson_upsert_utf8_icase (&uri->credentials, mstr_cstring (canon), value); } else if (!strcmp (key, MONGOC_URI_READCONCERNLEVEL)) { if (!mongoc_read_concern_is_default (uri->read_concern)) { @@ -1151,7 +1084,7 @@ mongoc_uri_apply_options (mongoc_uri_t *uri, const bson_t *options, bool from_dn MONGOC_WARNING (MONGOC_URI_GSSAPISERVICENAME " is deprecated, use " MONGOC_URI_AUTHMECHANISMPROPERTIES " with SERVICE_NAME instead"); - if (!mongoc_uri_parse_auth_mechanism_properties (uri, tmp)) { + if (!_parse_and_set_auth_mechanism_properties (uri, tmp)) { bson_free (tmp); goto UNSUPPORTED_VALUE; } @@ -1162,13 +1095,13 @@ mongoc_uri_apply_options (mongoc_uri_t *uri, const bson_t *options, bool from_dn if (!mongoc_uri_check_srv_service_name (uri, value)) { goto UNSUPPORTED_VALUE; } - mongoc_uri_bson_append_or_replace_key (&uri->options, canon, value); + _bson_upsert_utf8_icase (&uri->options, mstr_cstring (canon), value); } else if (!strcmp (key, MONGOC_URI_AUTHMECHANISMPROPERTIES)) { if (bson_has_field (&uri->credentials, key)) { HANDLE_DUPE (); } - if (!mongoc_uri_parse_auth_mechanism_properties (uri, value)) { + if (!_parse_and_set_auth_mechanism_properties (uri, value)) { goto UNSUPPORTED_VALUE; } @@ -1192,7 +1125,7 @@ mongoc_uri_apply_options (mongoc_uri_t *uri, const bson_t *options, bool from_dn } } else if (mongoc_uri_option_is_utf8 (key)) { - mongoc_uri_bson_append_or_replace_key (&uri->options, canon, value); + _bson_upsert_utf8_icase (&uri->options, mstr_cstring (canon), value); } else { /* @@ -1221,26 +1154,16 @@ mongoc_uri_apply_options (mongoc_uri_t *uri, const bson_t *options, bool from_dn * to their appropriate type and stored in uri->options. */ bool -mongoc_uri_parse_options (mongoc_uri_t *uri, const char *str, bool from_dns, bson_error_t *error) +_mongoc_uri_apply_query_string (mongoc_uri_t *uri, mstr_view remain, bool from_dns, bson_error_t *error) { - bson_t options; - const char *end_option; - char *option; - - bson_init (&options); - while ((option = scan_to_unichar (str, '&', "", &end_option))) { - if (!mongoc_uri_split_option (uri, &options, option, from_dns, error)) { - bson_free (option); + bson_t options = BSON_INITIALIZER; + for (; remain.len;) { + mstr_view entry; + mstr_split_around (remain, mstr_cstring ("&"), &entry, &remain); + if (!_handle_pct_uri_query_param (uri, &options, entry, from_dns, error)) { bson_destroy (&options); return false; } - bson_free (option); - str = end_option + 1; - } - - if (*str && !mongoc_uri_split_option (uri, &options, str, from_dns, error)) { - bson_destroy (&options); - return false; } /* Walk both sides of this map to handle each ordering: @@ -1892,168 +1815,211 @@ mongoc_uri_finalize_directconnection (mongoc_uri_t *uri, bson_error_t *error) return true; } +/** + * @brief Parse the authority component of the URI string. This is the part following + * "://" until the path, query, or fragment + * + * @param uri The URI to be updated + * @param authority The full authority string to be parsed + */ static bool -mongoc_uri_parse_before_slash (mongoc_uri_t *uri, const char *before_slash, bson_error_t *error) -{ - char *userpass; - const char *hosts; - - userpass = scan_to_unichar (before_slash, '@', "", &hosts); - if (userpass) { - if (!mongoc_uri_parse_userpass (uri, userpass, error)) { - goto error; - } - - hosts++; /* advance past "@" */ - if (*hosts == '@') { - /* special case: "mongodb://alice@@localhost" */ - MONGOC_URI_ERROR (error, "Invalid username or password. %s", escape_instructions); - goto error; +_parse_authority (mongoc_uri_t *uri, const mstr_view authority, bson_error_t *error) +{ + // Split around "@" if there is a userinfo + mstr_view userinfo, hostinfo; + if (mstr_split_around (authority, AT, &userinfo, &hostinfo)) { + // We have userinfo. Parse that first + if (!_uri_parse_userinfo (uri, userinfo, error)) { + // Fail to parse userinfo. Fail the full authority. + return false; } + // `hostinfo` now contains the authority part following the first "@" } else { - hosts = before_slash; + // No userinfo. The hostinfo is the entire string + hostinfo = authority; + } + + // Don't allow the host list to start with "@" + if (mstr_starts_with (hostinfo, AT)) { + /* special case: "mongodb://alice@@localhost" */ + MONGOC_URI_ERROR (error, "Invalid username or password. %s", escape_instructions); + return false; } if (uri->is_srv) { - if (!mongoc_uri_parse_srv (uri, hosts, error)) { - goto error; + // Parse as an SRV URI + if (!_parse_srv_hostname (uri, hostinfo, error)) { + return false; } } else { - if (!mongoc_uri_parse_hosts (uri, hosts)) { - MONGOC_URI_ERROR (error, "%s", "Invalid host string in URI"); - goto error; + // Parse a comma-separated host list + if (!_parse_hosts_csv (uri, hostinfo, error)) { + return false; } } - bson_free (userpass); return true; - -error: - bson_free (userpass); - return false; } - +/** + * @brief The elements of a decomposed URI string + * + * This isn't strictly conformant to any WWW spec, because our URI strings are weird, + * but the URI components correspond to the same elements in a normal URL or URI + */ +typedef struct { + /// The scheme of the URI, which precedes the "://" substring + mstr_view scheme; + /// The authority element, which includes the userinfo and the host specifier(s) + mstr_view authority; + /// The userinfo for the URI. If the URI has no userinfo, this is null + mstr_view userinfo; + /// The host specifier in the URI + mstr_view hosts; + /// The path string, including the leading "/" + mstr_view path; + /// The query string, including the leading "?" + mstr_view query; + /// The fragment element, including the leading "#" + mstr_view fragment; +} uri_parts; + +/** + * @brief Decompose a URI string into its constituent components + * + * @param components Pointer to struct that receives each URI component + * @param uri The URI string that is being inspected + * @return true If the decomposition was successful + * @return false Otherwise + * + * This does not allocate any memory or update an data related to `mongoc_uri_t`, + * it is purely a parsing operation. The string views attached to `*components` + * are views within the `uri` string. + * + * This function does not handle percent-encoding of elements. + */ static bool -mongoc_uri_parse (mongoc_uri_t *uri, const char *str, bson_error_t *error) +_decompose_uri_string (uri_parts *parts, mstr_view const uri, bson_error_t *error) { - BSON_ASSERT_PARAM (uri); - BSON_ASSERT_PARAM (str); + BSON_ASSERT_PARAM (parts); - const size_t str_len = strlen (str); + // Clear out + *parts = (uri_parts) {{0}}; - if (!bson_utf8_validate (str, str_len, false /* allow_null */)) { + // Check that the URI string is valid UTF-8, otherwise we'll refuse to parse it + if (!bson_utf8_validate (uri.data, uri.len, false /* allow_null */)) { MONGOC_URI_ERROR (error, "%s", "Invalid UTF-8 in URI"); return false; } - // Save for later. - const char *const str_end = str + str_len; + // Trim down the string as we read from left to right + mstr_view remain = uri; - // Parse and remove scheme and its delimiter. - // e.g. "mongodb://user:pass@host1:27017,host2:27018/database?key1=value1&key2=value2" - // ~~~~~~~~~~ - if (!mongoc_uri_parse_scheme (uri, str, &str)) { - MONGOC_URI_ERROR (error, "%s", "Invalid URI Schema, expecting 'mongodb://' or 'mongodb+srv://'"); + // * remain = "foo://bar@baz:1234/path?query#fragment" + // Grab the scheme, which is the part preceding "://" + if (!mstr_split_around (remain, mstr_cstring ("://"), &parts->scheme, &remain)) { + MONGOC_URI_ERROR (error, "%s", "Invalid URI, no scheme part specified"); return false; } - // str -> "user:pass@host1:27017,host2:27018/database?key1=value1&key2=value2" - - // From this point forward, use this cursor to find the split between "userhosts" and "dbopts". - const char *cursor = str; - // Remove userinfo and its delimiter. - // e.g. "user:pass@host1:27017,host2:27018/database?key1=value1&key2=value2" - // ~~~~~~~~~~ + // * remain = "bar@baz:1234/path?query#fragment" + // Only ':' is permitted among RFC-3986 gen-delims (":/?#[]@") in userinfo. + // However, continue supporting these characters for backward compatibility, as permitted by the Connection + // String spec: for backwards-compatibility reasons, drivers MAY allow reserved characters other than "@" and + // ":" to be present in user information without percent-encoding. + // To handle this, we will start scanning for the authority terminator beginning + // after a possible "@" symbol in the URI. If no "@" is present, we don't need to + // do anything different. { - const char *tmp; - - // Only ':' is permitted among RFC-3986 gen-delims (":/?#[]@") in userinfo. - // However, continue supporting these characters for backward compatibility, as permitted by the Connection - // String spec: for backwards-compatibility reasons, drivers MAY allow reserved characters other than "@" and - // ":" to be present in user information without percent-encoding. - char *userinfo = scan_to_unichar (cursor, '@', "", &tmp); - - if (userinfo) { - cursor = tmp + 1; // Consume userinfo delimiter. - bson_free (userinfo); + size_t userinfo_end_pos = mstr_find (remain, AT); + if (userinfo_end_pos == SIZE_MAX) { + // There is no userinfo, so we don't need to do anything speical + userinfo_end_pos = 0; + } + // Find the position of the first character that terminates the authority element + const size_t term_pos = mstr_find_first_of (remain, mstr_cstring ("/?#"), userinfo_end_pos); + mstr_split_at (remain, term_pos, &parts->authority, &remain); + + // Now we should split the authority between the userinfo and the hosts + { + const size_t at_pos = mstr_find (parts->authority, AT); + if (at_pos != SIZE_MAX) { + // We have a userinfo component + mstr_split_at (parts->authority, at_pos, 1, &parts->userinfo, &parts->hosts); + } else { + // We have no userinfo, so the authority string is just the host list + parts->hosts = parts->authority; + } } } - // cursor -> "host1:27017,host2:27018/database?key1=value1&key2=value2" - - // Find either the optional auth database delimiter or the query delimiter. - // e.g. "host1:27017,host2:27018/database?key1=value1&key2=value2" - // ^ - // e.g. "host1:27017,host2:27018?key1=value1&key2=value2" - // ^ - { - const char *tmp; - - // Only ':', '[', and ']' are permitted among RFC-3986 gen-delims (":/?#[]@") in hostinfo. - const char *const terminators = "/?#@"; - - char *hostinfo; - // Optional auth delimiter is present. - if ((hostinfo = scan_to_unichar (cursor, '/', terminators, &tmp))) { - cursor = tmp; // Include the delimiter. - bson_free (hostinfo); - } + // * remain = "/path?query#fragment" (Each following component is optional, but this is the proper order) + const size_t path_end_pos = mstr_find_first_of (remain, mstr_cstring ("?#")); + mstr_split_at (remain, path_end_pos, &parts->path, &remain); + // * remain = "query#fragment" (Each following component is optional, but this is the proper order) + const size_t hash_pos = mstr_find_first_of (remain, mstr_cstring ("#")); + mstr_split_at (remain, hash_pos, &parts->query, &remain); + // * remain = "#fragment" + parts->fragment = remain; + return true; +} - // Query delimiter is present. - else if ((hostinfo = scan_to_unichar (cursor, '?', terminators, &tmp))) { - cursor = tmp; // Include the delimiter. - bson_free (hostinfo); - } +/** + * @brief Parse the given URI C string into the URI structure + * + * @param uri Pointer to an initialized empty URI object to be updated + * @param str Pointer to a C string for the URI string itself + * @return true If the parse operation is successful, and `*uri` is updated + * @return false Otherwise, and `*uri` contents are unspecified + */ +static bool +mongoc_uri_parse (mongoc_uri_t *uri, const char *str, bson_error_t *error) +{ + BSON_ASSERT_PARAM (uri); + BSON_ASSERT_PARAM (str); - // Neither delimiter is present. Entire rest of string is part of hostinfo. - else { - cursor = str_end; // Jump to end of string. - BSON_ASSERT (*cursor == '\0'); - } + // Split the URI into its parts + mstr_view remain = mstr_cstring (str); + uri_parts parts; + if (!_decompose_uri_string (&parts, remain, error)) { + return false; } - // cursor -> "/database?key1=value1&key2=value2" - // Parse "userhosts". e.g. "user:pass@host1:27017,host2:27018" - { - char *const userhosts = bson_strndup (str, (size_t) (cursor - str)); - const bool ret = mongoc_uri_parse_before_slash (uri, userhosts, error); - bson_free (userhosts); - if (!ret) { - return false; - } + // Don't allow a fragment specifier. We don't support that + if (parts.fragment.len) { + MONGOC_URI_ERROR (error, + "Invalid URI string \"%s\": URIs cannot have a fragment element (got '%.*s')", + str, + MSTR_FMT (parts.fragment)); + return false; } - // Parse "dbopts". e.g. "/database?key1=value1&key2=value2" - if (*cursor != '\0') { - BSON_ASSERT (*cursor == '/' || *cursor == '?'); - - // Parse the auth database. - if (*cursor == '/') { - ++cursor; // Consume the delimiter. + // Detect whether we are a "mongodb" or "mongodb+srv" URI + if (mstr_cmp (parts.scheme, ==, mstr_cstring ("mongodb"))) { + uri->is_srv = false; + } else if (mstr_cmp (parts.scheme, ==, mstr_cstring ("mongodb+srv"))) { + uri->is_srv = true; + } else { + MONGOC_URI_ERROR (error, + "Invalid URI scheme \"%.*s://\". Expected one of \"mongodb://\" or \"mongodb+srv://\"", + MSTR_FMT (parts.scheme)); + return false; + } - // No auth database may be present even if the delimiter is present. - // e.g. "mongodb://localhost:27017/" - if (*cursor != '\0') { - if (!mongoc_uri_parse_database (uri, cursor, &cursor)) { - MONGOC_URI_ERROR (error, "%s", "Invalid database name in URI"); - return false; - } - } - } + // Handle the authority, including the userinfo and host specifier(s) + if (!_parse_authority (uri, parts.authority, error)) { + return false; + } - // Parse the query options. - if (*cursor == '?') { - ++cursor; // Consume the delimiter. + // If we have a path, parse that as the auth database + if (!_parse_path (uri, parts.path, error)) { + return false; + } - // No options may be present even if the delimiter is present. - // e.g. "mongodb://localhost:27017?" - if (*cursor != '\0') { - if (!mongoc_uri_parse_options (uri, cursor, false /* from DNS */, error)) { - return false; - } - } - } + // If we have a query, parse that as the URI settings + if (parts.query.len && + !_mongoc_uri_apply_query_string (uri, mstr_substr (parts.query, 1), false /* from DNS */, error)) { + return false; } return mongoc_uri_finalize (uri, error); @@ -2119,7 +2085,7 @@ mongoc_uri_set_auth_mechanism (mongoc_uri_t *uri, const char *value) return false; } - mongoc_uri_bson_append_or_replace_key (&uri->credentials, MONGOC_URI_AUTHMECHANISM, value); + _bson_upsert_utf8_icase (&uri->credentials, mstr_cstring (MONGOC_URI_AUTHMECHANISM), value); return true; } @@ -2548,7 +2514,7 @@ mongoc_uri_set_auth_source (mongoc_uri_t *uri, const char *value) return false; } - mongoc_uri_bson_append_or_replace_key (&uri->credentials, MONGOC_URI_AUTHSOURCE, value); + _bson_upsert_utf8_icase (&uri->credentials, mstr_cstring (MONGOC_URI_AUTHSOURCE), value); return true; } @@ -2576,7 +2542,7 @@ mongoc_uri_set_appname (mongoc_uri_t *uri, const char *value) return false; } - mongoc_uri_bson_append_or_replace_key (&uri->options, MONGOC_URI_APPNAME, value); + _bson_upsert_utf8_icase (&uri->options, mstr_cstring (MONGOC_URI_APPNAME), value); return true; } @@ -2584,29 +2550,25 @@ mongoc_uri_set_appname (mongoc_uri_t *uri, const char *value) bool mongoc_uri_set_compressors (mongoc_uri_t *uri, const char *value) { - const char *end_compressor; - char *entry; + bson_reinit (&uri->compressors); - bson_destroy (&uri->compressors); - bson_init (&uri->compressors); + if (!value) { + // Just clear the compressors + return true; + } - if (value && !bson_utf8_validate (value, strlen (value), false)) { + if (!bson_utf8_validate (value, strlen (value), false)) { + // Invalid UTF-8 in the string return false; } - while ((entry = scan_to_unichar (value, ',', "", &end_compressor))) { + + for (mstr_view remain = mstr_cstring (value); remain.len;) { + mstr_view entry; + mstr_split_around (remain, COMMA, &entry, &remain); if (mongoc_compressor_supported (entry)) { - mongoc_uri_bson_append_or_replace_key (&uri->compressors, entry, "yes"); - } else { - MONGOC_WARNING ("Unsupported compressor: '%s'", entry); - } - value = end_compressor + 1; - bson_free (entry); - } - if (value) { - if (mongoc_compressor_supported (value)) { - mongoc_uri_bson_append_or_replace_key (&uri->compressors, value, "yes"); + _bson_upsert_utf8_icase (&uri->compressors, entry, "yes"); } else { - MONGOC_WARNING ("Unsupported compressor: '%s'", value); + MONGOC_WARNING ("Unsupported compressor: '%.*s'", MSTR_FMT (entry)); } } @@ -2759,68 +2721,12 @@ mongoc_uri_get_string (const mongoc_uri_t *uri) char * mongoc_uri_unescape (const char *escaped_string) { - bson_unichar_t c; - unsigned int hex = 0; - const char *ptr; - const char *end; - size_t len; - bool unescape_occurred = false; - - BSON_ASSERT (escaped_string); - - len = strlen (escaped_string); - - /* - * Double check that this is a UTF-8 valid string. Bail out if necessary. - */ - if (!bson_utf8_validate (escaped_string, len, false)) { - MONGOC_WARNING ("%s(): escaped_string contains invalid UTF-8", BSON_FUNC); - return NULL; - } - - ptr = escaped_string; - end = ptr + len; - - mcommon_string_append_t append; - mcommon_string_new_with_capacity_as_append (&append, len); - - for (; *ptr; ptr = bson_utf8_next_char (ptr)) { - c = bson_utf8_get_char (ptr); - switch (c) { - case '%': - if (((end - ptr) < 2) || !isxdigit (ptr[1]) || !isxdigit (ptr[2]) || -#ifdef _MSC_VER - (1 != sscanf_s (&ptr[1], "%02x", &hex)) -#else - (1 != sscanf (&ptr[1], "%02x", &hex)) -#endif - || 0 == hex) { - mcommon_string_from_append_destroy (&append); - MONGOC_WARNING ("Invalid %% escape sequence"); - return NULL; - } - - // This isn't guaranteed to be valid UTF-8, we check again below - char byte = (char) hex; - mcommon_string_append_bytes (&append, &byte, 1); - ptr += 2; - unescape_occurred = true; - break; - default: - mcommon_string_append_unichar (&append, c); - break; - } - } - - /* Check that after unescaping, it is still valid UTF-8 */ - if (unescape_occurred && - !bson_utf8_validate (mcommon_str_from_append (&append), mcommon_strlen_from_append (&append), false)) { - MONGOC_WARNING ("Invalid %% escape sequence: unescaped string contains invalid UTF-8"); - mcommon_string_from_append_destroy (&append); - return NULL; + bson_error_t error; + char *r = _strdup_pct_decode (mstr_cstring (escaped_string), &error); + if (!r) { + MONGOC_WARNING ("%s(): Invalid %% escape sequence: %s", BSON_FUNC, error.message); } - - return mcommon_string_from_append_destroy_with_steal (&append); + return r; } @@ -2930,7 +2836,7 @@ mongoc_uri_set_server_monitoring_mode (mongoc_uri_t *uri, const char *value) return false; } - mongoc_uri_bson_append_or_replace_key (&uri->options, MONGOC_URI_SERVERMONITORINGMODE, value); + _bson_upsert_utf8_icase (&uri->options, mstr_cstring (MONGOC_URI_SERVERMONITORINGMODE), value); return true; } @@ -3454,7 +3360,6 @@ mongoc_uri_set_option_as_utf8 (mongoc_uri_t *uri, const char *option_orig, const { const char *option; size_t len; - char *option_lowercase = NULL; option = mongoc_uri_canonicalize_option (option_orig); BSON_ASSERT (option); @@ -3473,9 +3378,7 @@ mongoc_uri_set_option_as_utf8 (mongoc_uri_t *uri, const char *option_orig, const } else if (!bson_strcasecmp (option, MONGOC_URI_SERVERMONITORINGMODE)) { return mongoc_uri_set_server_monitoring_mode (uri, value); } else { - option_lowercase = lowercase_str_new (option); - mongoc_uri_bson_append_or_replace_key (&uri->options, option_lowercase, value); - bson_free (option_lowercase); + _bson_upsert_utf8_icase (&uri->options, mstr_cstring (option), value); } return true; diff --git a/src/libmongoc/src/mongoc/mongoc-util-private.h b/src/libmongoc/src/mongoc/mongoc-util-private.h index 84e007396ca..81a17691c16 100644 --- a/src/libmongoc/src/mongoc/mongoc-util-private.h +++ b/src/libmongoc/src/mongoc/mongoc-util-private.h @@ -21,6 +21,7 @@ #include #include +#include #ifdef BSON_HAVE_STRINGS_H #include @@ -103,8 +104,16 @@ mongoc_lowercase (const char *src, char *buf /* OUT */); void mongoc_lowercase_inplace (char *src); +/** + * @brief Parse a network port number + * + * @param spelling The decimal spelling of the port number + * @param out The port number to be updated + * @return true If the parse is successful + * @return false Otherwise + */ bool -mongoc_parse_port (uint16_t *port, const char *str); +_mongoc_parse_port (mstr_view spelling, uint16_t *out, bson_error_t *error); void _mongoc_bson_array_add_label (bson_t *bson, const char *label); diff --git a/src/libmongoc/src/mongoc/mongoc-util.c b/src/libmongoc/src/mongoc/mongoc-util.c index 11fcb182e33..abd4032e324 100644 --- a/src/libmongoc/src/mongoc/mongoc-util.c +++ b/src/libmongoc/src/mongoc/mongoc-util.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -546,22 +547,40 @@ mongoc_lowercase_inplace (char *src) } bool -mongoc_parse_port (uint16_t *port, const char *str) +_mongoc_parse_port (mstr_view spelling, uint16_t *out, bson_error_t *error) { - unsigned long ul_port; + bson_error_reset (error); + // Parse a strict natural number + uint64_t u = 0; + int ec = mlib_nat64_parse (spelling, 10, &u); + + if (!ec && u == 0) { + // Successful parse, but the value is zero + bson_set_error (error, MONGOC_ERROR_COMMAND, MONGOC_ERROR_COMMAND_INVALID_ARG, "Port number cannot be zero"); + return false; + } - ul_port = strtoul (str, NULL, 10); + if (ec == EINVAL) { + // The given string is just not a valid integer + bson_set_error ( + error, MONGOC_ERROR_COMMAND, MONGOC_ERROR_COMMAND_INVALID_ARG, "Port string is not a valid integer"); + return false; + } - if (ul_port == 0 || ul_port > UINT16_MAX) { - /* Parse error or port number out of range. mongod prohibits port 0. */ + if (ec == ERANGE || mlib_narrow (out, u)) { + // The value is out-of range for u64, or out-of range for u16 + bson_set_error (error, + MONGOC_ERROR_COMMAND, + MONGOC_ERROR_COMMAND_INVALID_ARG, + "Port number is out-of-range for a 16-bit integer"); return false; } - *port = (uint16_t) ul_port; + // No other errors are possible from nat64_parse + mlib_check (ec, eq, 0); return true; } - /*-------------------------------------------------------------------------- * * _mongoc_bson_array_add_label -- diff --git a/src/libmongoc/src/mongoc/mongoc-write-concern.c b/src/libmongoc/src/mongoc/mongoc-write-concern.c index 6a1fec1a42a..46a7c44c769 100644 --- a/src/libmongoc/src/mongoc/mongoc-write-concern.c +++ b/src/libmongoc/src/mongoc/mongoc-write-concern.c @@ -14,19 +14,16 @@ * limitations under the License. */ - +#include // BEGIN_IGNORE_DEPRECATIONS #include #include #include -#include #include -#include // BEGIN_IGNORE_DEPRECATIONS - +#include static void _mongoc_write_concern_freeze (mongoc_write_concern_t *write_concern); - /** * mongoc_write_concern_new: * @@ -50,7 +47,6 @@ mongoc_write_concern_new (void) return write_concern; } - mongoc_write_concern_t * mongoc_write_concern_copy (const mongoc_write_concern_t *write_concern) { @@ -69,7 +65,6 @@ mongoc_write_concern_copy (const mongoc_write_concern_t *write_concern) return ret; } - /** * mongoc_write_concern_destroy: * @write_concern: A mongoc_write_concern_t. @@ -86,7 +81,6 @@ mongoc_write_concern_destroy (mongoc_write_concern_t *write_concern) } } - bool mongoc_write_concern_get_journal (const mongoc_write_concern_t *write_concern) { @@ -94,7 +88,6 @@ mongoc_write_concern_get_journal (const mongoc_write_concern_t *write_concern) return (write_concern->journal == true); } - bool mongoc_write_concern_journal_is_set (const mongoc_write_concern_t *write_concern) { @@ -102,7 +95,6 @@ mongoc_write_concern_journal_is_set (const mongoc_write_concern_t *write_concern return (write_concern->journal != MONGOC_WRITE_CONCERN_JOURNAL_DEFAULT); } - /** * mongoc_write_concern_set_journal: * @write_concern: A mongoc_write_concern_t. @@ -121,7 +113,6 @@ mongoc_write_concern_set_journal (mongoc_write_concern_t *write_concern, bool jo write_concern->frozen = false; } - int32_t mongoc_write_concern_get_w (const mongoc_write_concern_t *write_concern) { @@ -129,7 +120,6 @@ mongoc_write_concern_get_w (const mongoc_write_concern_t *write_concern) return write_concern->w; } - /** * mongoc_write_concern_set_w: * @w: The number of nodes for write or MONGOC_WRITE_CONCERN_W_MAJORITY @@ -154,14 +144,12 @@ mongoc_write_concern_set_w (mongoc_write_concern_t *write_concern, int32_t w) write_concern->frozen = false; } - int32_t mongoc_write_concern_get_wtimeout (const mongoc_write_concern_t *write_concern) { return (int32_t) mongoc_write_concern_get_wtimeout_int64 (write_concern); } - int64_t mongoc_write_concern_get_wtimeout_int64 (const mongoc_write_concern_t *write_concern) { @@ -169,14 +157,12 @@ mongoc_write_concern_get_wtimeout_int64 (const mongoc_write_concern_t *write_con return write_concern->wtimeout; } - void mongoc_write_concern_set_wtimeout (mongoc_write_concern_t *write_concern, int32_t wtimeout_msec) { mongoc_write_concern_set_wtimeout_int64 (write_concern, (int64_t) wtimeout_msec); } - void mongoc_write_concern_set_wtimeout_int64 (mongoc_write_concern_t *write_concern, int64_t wtimeout_msec) { @@ -191,7 +177,6 @@ mongoc_write_concern_set_wtimeout_int64 (mongoc_write_concern_t *write_concern, write_concern->frozen = false; } - bool mongoc_write_concern_get_wmajority (const mongoc_write_concern_t *write_concern) { @@ -199,7 +184,6 @@ mongoc_write_concern_get_wmajority (const mongoc_write_concern_t *write_concern) return (write_concern->w == MONGOC_WRITE_CONCERN_W_MAJORITY); } - /** * mongoc_write_concern_set_wmajority: * @write_concern: A mongoc_write_concern_t. @@ -226,7 +210,6 @@ mongoc_write_concern_set_wmajority (mongoc_write_concern_t *write_concern, int32 } } - const char * mongoc_write_concern_get_wtag (const mongoc_write_concern_t *write_concern) { @@ -239,7 +222,6 @@ mongoc_write_concern_get_wtag (const mongoc_write_concern_t *write_concern) return NULL; } - void mongoc_write_concern_set_wtag (mongoc_write_concern_t *write_concern, const char *wtag) { @@ -284,7 +266,6 @@ mongoc_write_concern_is_default (const mongoc_write_concern_t *write_concern) return !write_concern || write_concern->is_default; } - /** * mongoc_write_concern_freeze: * @write_concern: A mongoc_write_concern_t. @@ -327,7 +308,6 @@ _mongoc_write_concern_freeze (mongoc_write_concern_t *write_concern) } } - /** * mongoc_write_concern_is_acknowledged: * @concern: (in): A mongoc_write_concern_t. @@ -347,7 +327,6 @@ mongoc_write_concern_is_acknowledged (const mongoc_write_concern_t *write_concer return true; } - /** * mongoc_write_concern_is_valid: * @write_concern: (in): A mongoc_write_concern_t. @@ -377,7 +356,6 @@ mongoc_write_concern_is_valid (const mongoc_write_concern_t *write_concern) return true; } - static bool _mongoc_write_concern_validate (const mongoc_write_concern_t *write_concern, bson_error_t *error) { @@ -388,7 +366,6 @@ _mongoc_write_concern_validate (const mongoc_write_concern_t *write_concern, bso return true; } - /** * _mongoc_parse_wc_err: * @doc: (in): A bson document. @@ -421,7 +398,6 @@ _mongoc_parse_wc_err (const bson_t *doc, bson_error_t *error) return false; } - /** * mongoc_write_concern_append: * @write_concern: (in): A mongoc_write_concern_t. diff --git a/src/libmongoc/tests/TestSuite.c b/src/libmongoc/tests/TestSuite.c index b861f6e09f4..6bc5ac38887 100644 --- a/src/libmongoc/tests/TestSuite.c +++ b/src/libmongoc/tests/TestSuite.c @@ -1230,7 +1230,7 @@ test_bulkwriteexception_str (const mongoc_bulkwriteexception_t *bwe) int32_t get_current_connection_count (const char *host_and_port) { - char *uri_str = bson_strdup_printf ("mongodb://%s\n", host_and_port); + char *uri_str = bson_strdup_printf ("mongodb://%s", host_and_port); char *uri_str_with_auth = test_framework_add_user_password_from_env (uri_str); mongoc_client_t *client = mongoc_client_new (uri_str_with_auth); test_framework_set_ssl_opts (client); diff --git a/src/libmongoc/tests/json/connection_uri/additional-nonspec-tests.json b/src/libmongoc/tests/json/connection_uri/additional-nonspec-tests.json index 5cab2eb1433..6907e7bfcff 100644 --- a/src/libmongoc/tests/json/connection_uri/additional-nonspec-tests.json +++ b/src/libmongoc/tests/json/connection_uri/additional-nonspec-tests.json @@ -153,7 +153,7 @@ "description": "Username containing percent encoded multi-byte UTF-8 is valid", "uri": "mongodb://%E2%D8%83", "valid": false, - "warning": true + "warning": false }, { "description": "Valid connection and timeout options are parsed correctly, libmongoc version without maxIdleTimeMS", @@ -186,4 +186,4 @@ } } ] -} +} \ No newline at end of file diff --git a/src/libmongoc/tests/test-mongoc-connection-uri.c b/src/libmongoc/tests/test-mongoc-connection-uri.c index 6151e386b20..d5c96b63928 100644 --- a/src/libmongoc/tests/test-mongoc-connection-uri.c +++ b/src/libmongoc/tests/test-mongoc-connection-uri.c @@ -92,7 +92,7 @@ run_uri_test (const char *uri_string, if (strchr (bson_iter_utf8 (&iter, NULL), '.')) { BSON_ASSERT (!uri); ASSERT_ERROR_CONTAINS ( - error, MONGOC_ERROR_COMMAND, MONGOC_ERROR_COMMAND_INVALID_ARG, "Invalid database name in URI"); + error, MONGOC_ERROR_COMMAND, MONGOC_ERROR_COMMAND_INVALID_ARG, "Invalid database specifier \"admin."); clear_captured_logs (); return; } diff --git a/src/libmongoc/tests/test-mongoc-max-staleness.c b/src/libmongoc/tests/test-mongoc-max-staleness.c index da7945bc488..d67aa75a86d 100644 --- a/src/libmongoc/tests/test-mongoc-max-staleness.c +++ b/src/libmongoc/tests/test-mongoc-max-staleness.c @@ -84,8 +84,9 @@ test_mongoc_client_max_staleness (void) ASSERT (!test_framework_client_new ( "mongodb://a/?" MONGOC_URI_READPREFERENCE "=secondary&" MONGOC_URI_MAXSTALENESSSECONDS "=10.5", NULL)); - ASSERT_CAPTURED_LOG ( - MONGOC_URI_MAXSTALENESSSECONDS "=10.5", MONGOC_LOG_LEVEL_WARNING, "Invalid " MONGOC_URI_MAXSTALENESSSECONDS); + ASSERT_CAPTURED_LOG (MONGOC_URI_MAXSTALENESSSECONDS "=10.5", + MONGOC_LOG_LEVEL_WARNING, + "Unsupported value for \"maxstalenessseconds\""); capture_logs (false); /* 1 is allowed, it'll be rejected once we begin server selection */ diff --git a/src/libmongoc/tests/test-mongoc-uri.c b/src/libmongoc/tests/test-mongoc-uri.c index 06b63cd6562..d48764272dc 100644 --- a/src/libmongoc/tests/test-mongoc-uri.c +++ b/src/libmongoc/tests/test-mongoc-uri.c @@ -46,6 +46,12 @@ test_mongoc_uri_new (void) ASSERT (!mongoc_uri_new ("mongodb://localhost::27017")); ASSERT (!mongoc_uri_new ("mongodb://localhost,localhost::")); ASSERT (!mongoc_uri_new ("mongodb://local1,local2,local3/d?k")); + // %-encoded chars that are invalid in the database name + ASSERT (!mongoc_uri_new ("mongodb://local1,local2,local3/db%2fname")); // "/" + ASSERT (!mongoc_uri_new ("mongodb://local1,local2,local3/db%20ame")); // " " + ASSERT (!mongoc_uri_new ("mongodb://local1,local2,local3/db%5came")); // "\" + ASSERT (!mongoc_uri_new ("mongodb://local1,local2,local3/db%24ame")); // "$" + ASSERT (!mongoc_uri_new ("mongodb://local1,local2,local3/db%22ame")); // '"' ASSERT (!mongoc_uri_new ("")); ASSERT (!mongoc_uri_new ("mongodb://,localhost:27017")); ASSERT (!mongoc_uri_new ("mongodb://localhost:27017,,b")); @@ -251,19 +257,19 @@ test_mongoc_uri_new (void) capture_logs (true); uri = mongoc_uri_new ("mongodb://u%ser:pwd@localhost:27017"); ASSERT (!uri); - ASSERT_CAPTURED_LOG ("uri", MONGOC_LOG_LEVEL_WARNING, "Invalid % escape sequence"); + ASSERT_CAPTURED_LOG ("uri", MONGOC_LOG_LEVEL_WARNING, "Invalid %-sequence \"%se\""); capture_logs (false); capture_logs (true); uri = mongoc_uri_new ("mongodb://user:p%wd@localhost:27017"); ASSERT (!uri); - ASSERT_CAPTURED_LOG ("uri", MONGOC_LOG_LEVEL_WARNING, "Invalid % escape sequence"); + ASSERT_CAPTURED_LOG ("uri", MONGOC_LOG_LEVEL_WARNING, "Invalid %-sequence \"%wd\""); capture_logs (false); capture_logs (true); uri = mongoc_uri_new ("mongodb://user:pwd@local% host:27017"); ASSERT (!uri); - ASSERT_CAPTURED_LOG ("uri", MONGOC_LOG_LEVEL_WARNING, "Invalid % escape sequence"); + ASSERT_CAPTURED_LOG ("uri", MONGOC_LOG_LEVEL_WARNING, "Invalid %-sequence \"% h\""); capture_logs (false); uri = mongoc_uri_new ("mongodb://christian%40realm@localhost:27017/?replicaset=%20"); @@ -1415,12 +1421,15 @@ test_mongoc_uri_new_with_error (void) mongoc_uri_destroy (uri); ASSERT (!mongoc_uri_new_with_error ("mongodb://", &error)); - ASSERT_ERROR_CONTAINS (error, MONGOC_ERROR_COMMAND, MONGOC_ERROR_COMMAND_INVALID_ARG, "Invalid host string in URI"); + ASSERT_ERROR_CONTAINS ( + error, MONGOC_ERROR_COMMAND, MONGOC_ERROR_COMMAND_INVALID_ARG, "Host list of URI string cannot be empty"); error = BSON_ERROR_INIT; ASSERT (!mongoc_uri_new_with_error ("mongo://localhost", &error)); - ASSERT_ERROR_CONTAINS ( - error, MONGOC_ERROR_COMMAND, MONGOC_ERROR_COMMAND_INVALID_ARG, "Invalid URI Schema, expecting 'mongodb://'"); + ASSERT_ERROR_CONTAINS (error, + MONGOC_ERROR_COMMAND, + MONGOC_ERROR_COMMAND_INVALID_ARG, + "Invalid URI scheme \"mongo://\". Expected one of \"mongodb://\" or \"mongodb+srv://\""); error = BSON_ERROR_INIT; ASSERT (!mongoc_uri_new_with_error ("mongodb://localhost/?readPreference=unknown", &error)); @@ -1449,24 +1458,21 @@ test_mongoc_uri_new_with_error (void) error = BSON_ERROR_INIT; ASSERT (!mongoc_uri_new_with_error ("mongodb://user%p:pass@localhost/", &error)); - ASSERT_ERROR_CONTAINS (error, - MONGOC_ERROR_COMMAND, - MONGOC_ERROR_COMMAND_INVALID_ARG, - "Incorrect URI escapes in username. Percent-encode " - "username and password according to RFC 3986"); + ASSERT_ERROR_CONTAINS (error, MONGOC_ERROR_COMMAND, MONGOC_ERROR_COMMAND_INVALID_ARG, "Truncated %-sequence \"%p\""); error = BSON_ERROR_INIT; ASSERT (!mongoc_uri_new_with_error ("mongodb://l%oc, alhost/", &error)); - ASSERT_ERROR_CONTAINS (error, MONGOC_ERROR_COMMAND, MONGOC_ERROR_COMMAND_INVALID_ARG, "Invalid host string in URI"); + ASSERT_ERROR_CONTAINS ( + error, MONGOC_ERROR_COMMAND, MONGOC_ERROR_COMMAND_INVALID_ARG, "Invalid host specifier \"l%oc\""); error = BSON_ERROR_INIT; ASSERT (!mongoc_uri_new_with_error ("mongodb:///tmp/mongodb.sock", &error)); - ASSERT_ERROR_CONTAINS (error, MONGOC_ERROR_COMMAND, MONGOC_ERROR_COMMAND_INVALID_ARG, "Invalid host string in URI"); + ASSERT_ERROR_CONTAINS ( + error, MONGOC_ERROR_COMMAND, MONGOC_ERROR_COMMAND_INVALID_ARG, "Host list of URI string cannot be empty"); error = BSON_ERROR_INIT; ASSERT (!mongoc_uri_new_with_error ("mongodb://localhost/db.na%me", &error)); - ASSERT_ERROR_CONTAINS ( - error, MONGOC_ERROR_COMMAND, MONGOC_ERROR_COMMAND_INVALID_ARG, "Invalid database name in URI"); + ASSERT_ERROR_CONTAINS (error, MONGOC_ERROR_COMMAND, MONGOC_ERROR_COMMAND_INVALID_ARG, "Invalid %-sequence \"%me\""); error = BSON_ERROR_INIT; ASSERT (!mongoc_uri_new_with_error ("mongodb://localhost/db?journal=true&w=0", &error)); @@ -1498,15 +1504,18 @@ test_mongoc_uri_new_with_error (void) error = BSON_ERROR_INIT; ASSERT (!mongoc_uri_new_with_error ("mongodb+srv://%", &error)); - ASSERT_ERROR_CONTAINS (error, MONGOC_ERROR_COMMAND, MONGOC_ERROR_COMMAND_INVALID_ARG, "Invalid service name in URI"); + ASSERT_ERROR_CONTAINS ( + error, MONGOC_ERROR_COMMAND, MONGOC_ERROR_COMMAND_INVALID_ARG, "Invalid SRV service name \"%\" in URI"); error = BSON_ERROR_INIT; ASSERT (!mongoc_uri_new_with_error ("mongodb+srv://x", &error)); - ASSERT_ERROR_CONTAINS (error, MONGOC_ERROR_COMMAND, MONGOC_ERROR_COMMAND_INVALID_ARG, "Invalid service name in URI"); + ASSERT_ERROR_CONTAINS ( + error, MONGOC_ERROR_COMMAND, MONGOC_ERROR_COMMAND_INVALID_ARG, "Invalid SRV service name \"x\" in URI"); error = BSON_ERROR_INIT; ASSERT (!mongoc_uri_new_with_error ("mongodb+srv://x.y", &error)); - ASSERT_ERROR_CONTAINS (error, MONGOC_ERROR_COMMAND, MONGOC_ERROR_COMMAND_INVALID_ARG, "Invalid service name in URI"); + ASSERT_ERROR_CONTAINS ( + error, MONGOC_ERROR_COMMAND, MONGOC_ERROR_COMMAND_INVALID_ARG, "Invalid SRV service name \"x.y\" in URI"); error = BSON_ERROR_INIT; ASSERT (!mongoc_uri_new_with_error ("mongodb+srv://a.b.c,d.e.f", &error)); @@ -1597,10 +1606,7 @@ test_mongoc_host_list_from_string (void) capture_logs (true); ASSERT (!_mongoc_host_list_from_string (&host_list, "[::1]extra_chars:27017")); - ASSERT_CAPTURED_LOG ("_mongoc_host_list_from_string", - MONGOC_LOG_LEVEL_ERROR, - "If present, port should immediately follow the \"]\"" - "in an IPv6 address"); + ASSERT_CAPTURED_LOG ("_mongoc_host_list_from_string", MONGOC_LOG_LEVEL_ERROR, "Invalid trailing content"); /* normal parsing, host and port are split, host is downcased */ ASSERT (_mongoc_host_list_from_string (&host_list, "localHOST:27019")); @@ -1707,7 +1713,7 @@ test_mongoc_uri_compressors (void) capture_logs (true); mongoc_uri_set_compressors (uri, ""); ASSERT_EQUAL_BSON (tmp_bson ("{}"), mongoc_uri_get_compressors (uri)); - ASSERT_CAPTURED_LOG ("mongoc_uri_set_compressors", MONGOC_LOG_LEVEL_WARNING, "Unsupported compressor: ''"); + ASSERT_NO_CAPTURED_LOGS ("Disable compression with empty string"); /* Disable compression */ @@ -2232,11 +2238,11 @@ test_mongoc_uri_long_hostname (void) capture_logs (true); ASSERT (!mongoc_uri_new (uri_str)); - ASSERT_CAPTURED_LOG ("mongoc_uri_new", MONGOC_LOG_LEVEL_ERROR, "too long"); + ASSERT_CAPTURED_LOG ("mongoc_uri_new", MONGOC_LOG_LEVEL_WARNING, "too long"); clear_captured_logs (); ASSERT (!mongoc_uri_new_for_host_port (host, 12345)); - ASSERT_CAPTURED_LOG ("mongoc_uri_new", MONGOC_LOG_LEVEL_ERROR, "too long"); + ASSERT_CAPTURED_LOG ("mongoc_uri_new", MONGOC_LOG_LEVEL_WARNING, "too long"); bson_free (uri_str); bson_free (host_and_port); @@ -2697,7 +2703,7 @@ test_mongoc_uri_srv (void) const char *option = _key "=" #_value; \ char *lkey = bson_strdup (_key); \ mongoc_lowercase (lkey, lkey); \ - mongoc_uri_parse_options (uri, option, true /* from dns */, &error); \ + _mongoc_uri_apply_query_string (uri, mstr_cstring (option), true /* from dns */, &error); \ ASSERT_ERROR_CONTAINS ( \ error, MONGOC_ERROR_COMMAND, MONGOC_ERROR_COMMAND_INVALID_ARG, "prohibited in TXT record"); \ ASSERT (!bson_has_field (mongoc_uri_get_##_where (uri), lkey)); \ @@ -2714,7 +2720,7 @@ test_mongoc_uri_dns_options (void) uri = mongoc_uri_new ("mongodb+srv://a.b.c"); ASSERT (uri); - ASSERT (!mongoc_uri_parse_options (uri, "tls=false", true /* from dsn */, &error)); + ASSERT (!_mongoc_uri_apply_query_string (uri, mstr_cstring ("tls=false"), true /* from dsn */, &error)); ASSERT_ERROR_CONTAINS (error, MONGOC_ERROR_COMMAND, MONGOC_ERROR_COMMAND_INVALID_ARG, "prohibited in TXT record"); @@ -2726,8 +2732,8 @@ test_mongoc_uri_dns_options (void) PROHIBITED (MONGOC_URI_GSSAPISERVICENAME, malicious, utf8, credentials); /* the two options allowed in TXT records, case-insensitive */ - ASSERT (mongoc_uri_parse_options (uri, "authsource=db", true, NULL)); - ASSERT (mongoc_uri_parse_options (uri, "RepLIcaSET=rs", true, NULL)); + ASSERT (_mongoc_uri_apply_query_string (uri, mstr_cstring ("authsource=db"), true, NULL)); + ASSERT (_mongoc_uri_apply_query_string (uri, mstr_cstring ("RepLIcaSET=rs"), true, NULL)); /* test that URI string overrides TXT record options */ mongoc_uri_destroy (uri); @@ -2736,7 +2742,7 @@ test_mongoc_uri_dns_options (void) // test that parsing warns if replicaSet is ignored from TXT records. { capture_logs (true); - ASSERT (mongoc_uri_parse_options (uri, "replicaSet=db2", true, NULL)); + ASSERT (_mongoc_uri_apply_query_string (uri, mstr_cstring ("replicaSet=db2"), true, NULL)); ASSERT_CAPTURED_LOG ( "parsing replicaSet from TXT", MONGOC_LOG_LEVEL_WARNING, "Ignoring URI option \"replicaSet\""); capture_logs (false); @@ -2746,7 +2752,7 @@ test_mongoc_uri_dns_options (void) // test that parsing does not warn if authSource is ignored from TXT records. { capture_logs (true); - ASSERT (mongoc_uri_parse_options (uri, "authSource=db2", true, NULL)); + ASSERT (_mongoc_uri_apply_query_string (uri, mstr_cstring ("authSource=db2"), true, NULL)); ASSERT_NO_CAPTURED_LOGS ("parsing authSource from TXT"); capture_logs (false); ASSERT_MATCH (mongoc_uri_get_credentials (uri), "{'authsource': 'db1'}"); @@ -3122,7 +3128,7 @@ test_casing_options (void) uri = mongoc_uri_new ("mongodb://localhost:27017/"); mongoc_uri_set_option_as_bool (uri, "TLS", true); - mongoc_uri_parse_options (uri, "ssl=false", false, &error); + _mongoc_uri_apply_query_string (uri, mstr_cstring ("ssl=false"), false, &error); ASSERT_ERROR_CONTAINS (error, MONGOC_ERROR_COMMAND, MONGOC_ERROR_COMMAND_INVALID_ARG, "conflicts"); mongoc_uri_destroy (uri); @@ -3176,14 +3182,12 @@ test_parses_long_ipv6 (void) char *uri_string = bson_strdup_printf ("mongodb://%s", host_and_port); capture_logs (true); mongoc_uri_t *uri = mongoc_uri_new_with_error (uri_string, &error); - // Expect error parsing IPv6 literal is logged. - ASSERT_CAPTURED_LOG ("parsing IPv6", MONGOC_LOG_LEVEL_ERROR, "IPv6 literal provided in URI is too long"); + ASSERT_NO_CAPTURED_LOGS ("Invalid IPv6 address"); capture_logs (false); - // Expect a generic parsing error is also returned. ASSERT (!uri); ASSERT_ERROR_CONTAINS ( - error, MONGOC_ERROR_COMMAND, MONGOC_ERROR_COMMAND_INVALID_ARG, "Invalid host string in URI"); + error, MONGOC_ERROR_COMMAND, MONGOC_ERROR_COMMAND_INVALID_ARG, "Invalid host specifier \"["); mongoc_uri_destroy (uri); bson_free (uri_string);