ada-url · bbayles · Jul 17, 2025 · Jul 17, 2025 · Jul 17, 2025 · Jul 17, 2025
diff --git a/README.rst b/README.rst
@@ -1,9 +1,6 @@
 ada-url
 ========
 
-The `urlib.parse` module in Python does not follow the legacy RFC 3978 standard nor
-does it follow the newer WHATWG URL specification. It is also relatively slow.
-
 This is ``ada_url``, a fast standard-compliant Python library for working with URLs based on the ``Ada`` URL
 parser.
 
@@ -27,7 +24,7 @@ Parsing URLs
 ^^^^^^^^^^^^
 
 The ``URL`` class is intended to match the one described in the
-`WHATWG URL spec <https://url.spec.whatwg.org/#url-class>`_:.
+`WHATWG URL spec <https://url.spec.whatwg.org/#url-class>`_.
 
 .. code-block:: python
 
@@ -127,7 +124,8 @@ that it properly encodes IDNs and resolves paths:
     >>> parsed_url.pathname
     '/path2/'
 
-Contrast that with the Python standard library's ``urlib.parse`` module:
+Contrast that with the Python standard library's ``urllib.parse`` module, which loosely
+follows the older `RFC 3978 <https://datatracker.ietf.org/doc/html/rfc3978>`__ standard:
 
 .. code-block:: python
 
@@ -138,11 +136,13 @@ Contrast that with the Python standard library's ``urlib.parse`` module:
     >>> parsed_url.path
     '/./path/../path2/'
 
-Alternative Python bindings
----------------------------
+Performance
+-----------
 
 This package uses `CFFI <https://github.com/ada-url/ada-python/>`__ to call
-the ``Ada`` library's functions, which has a performance cost.
-The alternative `can_ada <https://github.com/tktech/can_ada>`__ (Canadian Ada)
-package uses `pybind11 <https://pybind11.readthedocs.io/en/stable/>`__ to generate a
-Python extension module, which is more performant.
+the ``Ada`` C library's functions, which makes it faster than the Python standard
+library's ``urllib.parse`` module for most applications.
+
+An alternative package, `can_ada <https://github.com/tktech/can_ada>`__, uses
+`pybind11 <https://pybind11.readthedocs.io/en/stable/>`__ to interact with the ``Ada``
+C++ library functions, which is even faster.
diff --git a/ada_url/ada.cpp b/ada_url/ada.cpp
@@ -1,4 +1,4 @@
-/* auto-generated on 2025-06-30 19:51:09 -0400. Do not edit! */
+/* auto-generated on 2025-07-16 22:15:14 -0400. Do not edit! */
 /* begin file src/ada.cpp */
 #include "ada.h"
 /* begin file src/checkers.cpp */
@@ -67,7 +67,8 @@ static constexpr std::array<uint8_t, 256> path_signature_table =
       std::array<uint8_t, 256> result{};
       for (size_t i = 0; i < 256; i++) {
         if (i <= 0x20 || i == 0x22 || i == 0x23 || i == 0x3c || i == 0x3e ||
-            i == 0x3f || i == 0x60 || i == 0x7b || i == 0x7d || i > 0x7e) {
+            i == 0x3f || i == 0x5e || i == 0x60 || i == 0x7b || i == 0x7d ||
+            i > 0x7e) {
           result[i] = 1;
         } else if (i == 0x25) {
           result[i] = 8;
@@ -10444,6 +10445,8 @@ ADA_POP_DISABLE_WARNINGS
 #include <arm_neon.h>
 #elif ADA_SSE2
 #include <emmintrin.h>
+#elif ADA_LSX
+#include <lsxintrin.h>
 #endif
 
 #include <ranges>
@@ -10552,6 +10555,38 @@ ada_really_inline bool has_tabs_or_newline(
   }
   return _mm_movemask_epi8(running) != 0;
 }
+#elif ADA_LSX
+ada_really_inline bool has_tabs_or_newline(
+    std::string_view user_input) noexcept {
+  // first check for short strings in which case we do it naively.
+  if (user_input.size() < 16) {  // slow path
+    return std::ranges::any_of(user_input, is_tabs_or_newline);
+  }
+  // fast path for long strings (expected to be common)
+  size_t i = 0;
+  const __m128i mask1 = __lsx_vrepli_b('\r');
+  const __m128i mask2 = __lsx_vrepli_b('\n');
+  const __m128i mask3 = __lsx_vrepli_b('\t');
+  // If we supported SSSE3, we could use the algorithm that we use for NEON.
+  __m128i running{0};
+  for (; i + 15 < user_input.size(); i += 16) {
+    __m128i word = __lsx_vld((const __m128i*)(user_input.data() + i), 0);
+    running = __lsx_vor_v(
+        __lsx_vor_v(running, __lsx_vor_v(__lsx_vseq_b(word, mask1),
+                                         __lsx_vseq_b(word, mask2))),
+        __lsx_vseq_b(word, mask3));
+  }
+  if (i < user_input.size()) {
+    __m128i word = __lsx_vld(
+        (const __m128i*)(user_input.data() + user_input.length() - 16), 0);
+    running = __lsx_vor_v(
+        __lsx_vor_v(running, __lsx_vor_v(__lsx_vseq_b(word, mask1),
+                                         __lsx_vseq_b(word, mask2))),
+        __lsx_vseq_b(word, mask3));
+  }
+  if (__lsx_bz_v(running)) return false;
+  return true;
+}
 #else
 ada_really_inline bool has_tabs_or_newline(
     std::string_view user_input) noexcept {
@@ -11385,6 +11420,58 @@ ada_really_inline size_t find_next_host_delimiter_special(
   }
   return size_t(view.length());
 }
+#elif ADA_LSX
+ada_really_inline size_t find_next_host_delimiter_special(
+    std::string_view view, size_t location) noexcept {
+  // first check for short strings in which case we do it naively.
+  if (view.size() - location < 16) {  // slow path
+    for (size_t i = location; i < view.size(); i++) {
+      if (view[i] == ':' || view[i] == '/' || view[i] == '\\' ||
+          view[i] == '?' || view[i] == '[') {
+        return i;
+      }
+    }
+    return size_t(view.size());
+  }
+  // fast path for long strings (expected to be common)
+  size_t i = location;
+  const __m128i mask1 = __lsx_vrepli_b(':');
+  const __m128i mask2 = __lsx_vrepli_b('/');
+  const __m128i mask3 = __lsx_vrepli_b('\\');
+  const __m128i mask4 = __lsx_vrepli_b('?');
+  const __m128i mask5 = __lsx_vrepli_b('[');
+
+  for (; i + 15 < view.size(); i += 16) {
+    __m128i word = __lsx_vld((const __m128i*)(view.data() + i), 0);
+    __m128i m1 = __lsx_vseq_b(word, mask1);
+    __m128i m2 = __lsx_vseq_b(word, mask2);
+    __m128i m3 = __lsx_vseq_b(word, mask3);
+    __m128i m4 = __lsx_vseq_b(word, mask4);
+    __m128i m5 = __lsx_vseq_b(word, mask5);
+    __m128i m =
+        __lsx_vor_v(__lsx_vor_v(__lsx_vor_v(m1, m2), __lsx_vor_v(m3, m4)), m5);
+    int mask = __lsx_vpickve2gr_hu(__lsx_vmsknz_b(m), 0);
+    if (mask != 0) {
+      return i + trailing_zeroes(mask);
+    }
+  }
+  if (i < view.size()) {
+    __m128i word =
+        __lsx_vld((const __m128i*)(view.data() + view.length() - 16), 0);
+    __m128i m1 = __lsx_vseq_b(word, mask1);
+    __m128i m2 = __lsx_vseq_b(word, mask2);
+    __m128i m3 = __lsx_vseq_b(word, mask3);
+    __m128i m4 = __lsx_vseq_b(word, mask4);
+    __m128i m5 = __lsx_vseq_b(word, mask5);
+    __m128i m =
+        __lsx_vor_v(__lsx_vor_v(__lsx_vor_v(m1, m2), __lsx_vor_v(m3, m4)), m5);
+    int mask = __lsx_vpickve2gr_hu(__lsx_vmsknz_b(m), 0);
+    if (mask != 0) {
+      return view.length() - 16 + trailing_zeroes(mask);
+    }
+  }
+  return size_t(view.length());
+}
 #else
 // : / [ \\ ?
 static constexpr std::array<uint8_t, 256> special_host_delimiters =
@@ -11518,6 +11605,53 @@ ada_really_inline size_t find_next_host_delimiter(std::string_view view,
   }
   return size_t(view.length());
 }
+#elif ADA_LSX
+ada_really_inline size_t find_next_host_delimiter(std::string_view view,
+                                                  size_t location) noexcept {
+  // first check for short strings in which case we do it naively.
+  if (view.size() - location < 16) {  // slow path
+    for (size_t i = location; i < view.size(); i++) {
+      if (view[i] == ':' || view[i] == '/' || view[i] == '?' ||
+          view[i] == '[') {
+        return i;
+      }
+    }
+    return size_t(view.size());
+  }
+  // fast path for long strings (expected to be common)
+  size_t i = location;
+  const __m128i mask1 = __lsx_vrepli_b(':');
+  const __m128i mask2 = __lsx_vrepli_b('/');
+  const __m128i mask4 = __lsx_vrepli_b('?');
+  const __m128i mask5 = __lsx_vrepli_b('[');
+
+  for (; i + 15 < view.size(); i += 16) {
+    __m128i word = __lsx_vld((const __m128i*)(view.data() + i), 0);
+    __m128i m1 = __lsx_vseq_b(word, mask1);
+    __m128i m2 = __lsx_vseq_b(word, mask2);
+    __m128i m4 = __lsx_vseq_b(word, mask4);
+    __m128i m5 = __lsx_vseq_b(word, mask5);
+    __m128i m = __lsx_vor_v(__lsx_vor_v(m1, m2), __lsx_vor_v(m4, m5));
+    int mask = __lsx_vpickve2gr_hu(__lsx_vmsknz_b(m), 0);
+    if (mask != 0) {
+      return i + trailing_zeroes(mask);
+    }
+  }
+  if (i < view.size()) {
+    __m128i word =
+        __lsx_vld((const __m128i*)(view.data() + view.length() - 16), 0);
+    __m128i m1 = __lsx_vseq_b(word, mask1);
+    __m128i m2 = __lsx_vseq_b(word, mask2);
+    __m128i m4 = __lsx_vseq_b(word, mask4);
+    __m128i m5 = __lsx_vseq_b(word, mask5);
+    __m128i m = __lsx_vor_v(__lsx_vor_v(m1, m2), __lsx_vor_v(m4, m5));
+    int mask = __lsx_vpickve2gr_hu(__lsx_vmsknz_b(m), 0);
+    if (mask != 0) {
+      return view.length() - 16 + trailing_zeroes(mask);
+    }
+  }
+  return size_t(view.length());
+}
 #else
 // : / [ ?
 static constexpr std::array<uint8_t, 256> host_delimiters = []() consteval {
@@ -11762,8 +11896,8 @@ ada_really_inline void parse_prepared_path(std::string_view input,
               ? path_buffer_tmp
               : path_view;
       if (unicode::is_double_dot_path_segment(path_buffer)) {
-        if ((helpers::shorten_path(path, type) || special) &&
-            location == std::string_view::npos) {
+        helpers::shorten_path(path, type);
+        if (location == std::string_view::npos) {
           path += '/';
         }
       } else if (unicode::is_single_dot_path_segment(path_buffer) &&
@@ -15318,8 +15452,8 @@ inline void url_aggregator::consume_prepared_path(std::string_view input) {
               ? path_buffer_tmp
               : path_view;
       if (unicode::is_double_dot_path_segment(path_buffer)) {
-        if ((helpers::shorten_path(path, type) || special) &&
-            location == std::string_view::npos) {
+        helpers::shorten_path(path, type);
+        if (location == std::string_view::npos) {
           path += '/';
         }
       } else if (unicode::is_single_dot_path_segment(path_buffer) &&

diff --git a/ada_url/ada.h b/ada_url/ada.h
@@ -1,4 +1,4 @@
-/* auto-generated on 2025-06-30 19:51:09 -0400. Do not edit! */
+/* auto-generated on 2025-07-16 22:15:14 -0400. Do not edit! */
 /* begin file include/ada.h */
 /**
  * @file ada.h
@@ -431,6 +431,10 @@ namespace ada {
 #define ADA_NEON 1
 #endif
 
+#if defined(__loongarch_sx)
+#define ADA_LSX 1
+#endif
+
 #ifndef __has_cpp_attribute
 #define ada_lifetime_bound
 #elif __has_cpp_attribute(msvc::lifetimebound)
@@ -4204,6 +4208,7 @@ enum class errors : uint8_t { type_error };
 #include <string_view>
 #include <string>
 #include <optional>
+#include <iostream>
 
 #if ADA_TESTING
 #include <iostream>
@@ -4233,6 +4238,17 @@ struct url_pattern_init {
     pattern,
   };
 
+  friend std::ostream& operator<<(std::ostream& os, process_type type) {
+    switch (type) {
+      case process_type::url:
+        return os << "url";
+      case process_type::pattern:
+        return os << "pattern";
+      default:
+        return os << "unknown";
+    }
+  }
+
   // All strings must be valid UTF-8.
   // @see https://urlpattern.spec.whatwg.org/#process-a-urlpatterninit
   static tl::expected<url_pattern_init, errors> process(
@@ -9410,7 +9426,7 @@ result<std::optional<url_pattern_result>> url_pattern<regex_provider>::match(
 
 #if ADA_INCLUDE_URL_PATTERN
 namespace ada::url_pattern_helpers {
-#ifdef ADA_TESTING
+#if defined(ADA_TESTING) || defined(ADA_LOGGING)
 inline std::string to_string(token_type type) {
   switch (type) {
     case token_type::INVALID_CHAR:
@@ -9437,7 +9453,7 @@ inline std::string to_string(token_type type) {
       ada::unreachable();
   }
 }
-#endif  // ADA_TESTING
+#endif  // defined(ADA_TESTING) || defined(ADA_LOGGING)
 
 template <url_pattern_regex::regex_concept regex_provider>
 constexpr void constructor_string_parser<regex_provider>::rewind() {
@@ -10498,14 +10514,14 @@ constructor_string_parser<regex_provider>::parse(std::string_view input) {
 #ifndef ADA_ADA_VERSION_H
 #define ADA_ADA_VERSION_H
 
-#define ADA_VERSION "3.2.5"
+#define ADA_VERSION "3.2.6"
 
 namespace ada {
 
 enum {
   ADA_VERSION_MAJOR = 3,
   ADA_VERSION_MINOR = 2,
-  ADA_VERSION_REVISION = 5,
+  ADA_VERSION_REVISION = 6,
 };
 
 }  // namespace ada

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "ada-url"
-version = "1.24.0"
+version = "1.25.0"
 authors = [
     {name = "Bo Bayles", email = "[email protected]"},
 ]