From 2c508a11dd8eca32f8dbc83a2bca663054ddea30 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Sat, 20 Mar 2021 12:50:05 +0100
Subject: [PATCH 1/8] Remove Python 2.7 and 3.5 support, add Python 3.9 support

---
 .github/workflows/build.yml   | 6 +++---
 .github/workflows/publish.yml | 4 ++--
 .github/workflows/tests.yml   | 2 +-
 README.rst                    | 2 +-
 docs/index.rst                | 2 +-
 setup.py                      | 4 +---
 6 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 986099db..37b9a64b 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -18,13 +18,13 @@ jobs:
         - python-version: 3.7
           env:
             TOXENV: docs
-        - python-version: 3.8
+        - python-version: 3.9
           env:
             TOXENV: flake8
-        - python-version: 3.8
+        - python-version: 3.9
           env:
             TOXENV: pylint
-        - python-version: 3.8
+        - python-version: 3.9
           env:
             TOXENV: security
 
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 9390e788..26b1c58a 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -12,10 +12,10 @@ jobs:
     steps:
     - uses: actions/checkout@v2
 
-    - name: Set up Python 3.8
+    - name: Set up Python 3.9
       uses: actions/setup-python@v2
       with:
-        python-version: 3.8
+        python-version: 3.9
 
     - name: Check Tag
       id: check-release-tag
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index c47af5f8..643c2655 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -14,7 +14,7 @@ jobs:
     runs-on: ubuntu-20.04
     strategy:
       matrix:
-        python-version: [2.7, 3.5, 3.6, 3.7, 3.8, pypy3]
+        python-version: [3.6, 3.7, 3.8, 3.9, pypy3]
 
     steps:
     - uses: actions/checkout@v2
diff --git a/README.rst b/README.rst
index e9cbd231..d097492b 100644
--- a/README.rst
+++ b/README.rst
@@ -27,7 +27,7 @@ This is a Python library of web-related functions, such as:
 Requirements
 ============
 
-Python 2.7 or Python 3.5+
+Python 3.6+
 
 Install
 =======
diff --git a/docs/index.rst b/docs/index.rst
index fdbda607..bd14188b 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -28,7 +28,7 @@ Modules
 Requirements
 ============
 
-Python 2.7 or Python 3.3+
+Python 3.6+
 
 Install
 =======
diff --git a/setup.py b/setup.py
index ea0ca0a9..75de11d3 100644
--- a/setup.py
+++ b/setup.py
@@ -18,13 +18,11 @@
         'License :: OSI Approved :: BSD License',
         'Operating System :: OS Independent',
         'Programming Language :: Python',
-        'Programming Language :: Python :: 2',
-        'Programming Language :: Python :: 2.7',
         'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.5',
         'Programming Language :: Python :: 3.6',
         'Programming Language :: Python :: 3.7',
         'Programming Language :: Python :: 3.8',
+        'Programming Language :: Python :: 3.9',
         'Programming Language :: Python :: Implementation :: CPython',
         'Programming Language :: Python :: Implementation :: PyPy',
         'Topic :: Internet :: WWW/HTTP',

From c16d7bac3af3148b7018c67ef7922a5da6b3e640 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Sat, 20 Mar 2021 13:14:30 +0100
Subject: [PATCH 2/8] Remove six

---
 setup.py               |   1 -
 stdeb.cfg              |   2 -
 tests/test_encoding.py |  27 ++++---
 tests/test_html.py     |  91 +++++++++++----------
 tests/test_url.py      |  24 ++++--
 w3lib/form.py          |   7 +-
 w3lib/html.py          |  26 +++---
 w3lib/url.py           | 179 ++++++++++++++++++++---------------------
 w3lib/util.py          |  18 ++---
 9 files changed, 193 insertions(+), 182 deletions(-)
 delete mode 100644 stdeb.cfg

diff --git a/setup.py b/setup.py
index 75de11d3..6f24f5eb 100644
--- a/setup.py
+++ b/setup.py
@@ -27,5 +27,4 @@
         'Programming Language :: Python :: Implementation :: PyPy',
         'Topic :: Internet :: WWW/HTTP',
     ],
-    install_requires=['six >= 1.4.1'],
 )
diff --git a/stdeb.cfg b/stdeb.cfg
deleted file mode 100644
index 5a7e8a2d..00000000
--- a/stdeb.cfg
+++ /dev/null
@@ -1,2 +0,0 @@
-[w3lib]
-Depends: python-six (>= 1.4.1)
diff --git a/tests/test_encoding.py b/tests/test_encoding.py
index 649c189a..3d3795cc 100644
--- a/tests/test_encoding.py
+++ b/tests/test_encoding.py
@@ -1,7 +1,14 @@
-import unittest, codecs
-import six
-from w3lib.encoding import (html_body_declared_encoding, read_bom, to_unicode,
-        http_content_type_encoding, resolve_encoding, html_to_unicode)
+import codecs
+import unittest
+
+from w3lib.encoding import (
+    html_body_declared_encoding,
+    http_content_type_encoding,
+    html_to_unicode,
+    read_bom,
+    resolve_encoding,
+    to_unicode,
+)
 
 class RequestEncodingTests(unittest.TestCase):
     utf8_fragments = [
@@ -107,18 +114,18 @@ def test_unicode_body(self):
         original_string = unicode_string.encode('cp1251')
         encoding, body_unicode = html_to_unicode(ct('cp1251'), original_string)
         # check body_as_unicode
-        self.assertTrue(isinstance(body_unicode, six.text_type))
+        self.assertTrue(isinstance(body_unicode, str))
         self.assertEqual(body_unicode, unicode_string)
 
     def _assert_encoding(self, content_type, body, expected_encoding,
                 expected_unicode):
-        assert not isinstance(body, six.text_type)
+        assert not isinstance(body, str)
         encoding, body_unicode = html_to_unicode(ct(content_type), body)
-        self.assertTrue(isinstance(body_unicode, six.text_type))
+        self.assertTrue(isinstance(body_unicode, str))
         self.assertEqual(norm_encoding(encoding),
                 norm_encoding(expected_encoding))
 
-        if isinstance(expected_unicode, six.string_types):
+        if isinstance(expected_unicode, str):
             self.assertEqual(body_unicode, expected_unicode)
         else:
             self.assertTrue(
@@ -177,9 +184,9 @@ def test_replace_wrong_encoding(self):
 
     def _assert_encoding_detected(self, content_type, expected_encoding, body,
             **kwargs):
-        assert not isinstance(body, six.text_type)
+        assert not isinstance(body, str)
         encoding, body_unicode  = html_to_unicode(ct(content_type), body, **kwargs)
-        self.assertTrue(isinstance(body_unicode, six.text_type))
+        self.assertTrue(isinstance(body_unicode, str))
         self.assertEqual(norm_encoding(encoding),  norm_encoding(expected_encoding))
 
     def test_BOM(self):
diff --git a/tests/test_html.py b/tests/test_html.py
index a3c31d87..89a651e4 100644
--- a/tests/test_html.py
+++ b/tests/test_html.py
@@ -1,18 +1,25 @@
-# -*- coding: utf-8 -*-
 import unittest
-import six
-from w3lib.html import (replace_entities, replace_tags, remove_comments,
-    remove_tags_with_content, replace_escape_chars, remove_tags, unquote_markup,
-    get_base_url, get_meta_refresh)
+
+from w3lib.html import (
+    get_base_url,
+    get_meta_refresh,
+    remove_comments,
+    remove_tags,
+    remove_tags_with_content,
+    replace_entities,
+    replace_escape_chars,
+    replace_tags,
+    unquote_markup,
+)
 
 
 class RemoveEntitiesTest(unittest.TestCase):
     def test_returns_unicode(self):
         # make sure it always return uncode
-        assert isinstance(replace_entities(b'no entities'), six.text_type)
-        assert isinstance(replace_entities(b'Price: &pound;100!'),  six.text_type)
-        assert isinstance(replace_entities(u'no entities'), six.text_type)
-        assert isinstance(replace_entities(u'Price: &pound;100!'),  six.text_type)
+        assert isinstance(replace_entities(b'no entities'), str)
+        assert isinstance(replace_entities(b'Price: &pound;100!'),  str)
+        assert isinstance(replace_entities(u'no entities'), str)
+        assert isinstance(replace_entities(u'Price: &pound;100!'),  str)
 
     def test_regular(self):
         # regular conversions
@@ -71,8 +78,8 @@ def test_encoding(self):
 class ReplaceTagsTest(unittest.TestCase):
     def test_returns_unicode(self):
         # make sure it always return uncode
-        assert isinstance(replace_tags(b'no entities'), six.text_type)
-        assert isinstance(replace_tags('no entities'), six.text_type)
+        assert isinstance(replace_tags(b'no entities'), str)
+        assert isinstance(replace_tags('no entities'), str)
 
     def test_replace_tags(self):
         self.assertEqual(replace_tags(u'This text contains <a>some tag</a>'),
@@ -88,10 +95,10 @@ def test_replace_tags_multiline(self):
 class RemoveCommentsTest(unittest.TestCase):
     def test_returns_unicode(self):
         # make sure it always return unicode
-        assert isinstance(remove_comments(b'without comments'), six.text_type)
-        assert isinstance(remove_comments(b'<!-- with comments -->'), six.text_type)
-        assert isinstance(remove_comments(u'without comments'), six.text_type)
-        assert isinstance(remove_comments(u'<!-- with comments -->'), six.text_type)
+        assert isinstance(remove_comments(b'without comments'), str)
+        assert isinstance(remove_comments(b'<!-- with comments -->'), str)
+        assert isinstance(remove_comments(u'without comments'), str)
+        assert isinstance(remove_comments(u'<!-- with comments -->'), str)
 
     def test_no_comments(self):
         # text without comments
@@ -112,16 +119,16 @@ def test_remove_comments(self):
 class RemoveTagsTest(unittest.TestCase):
     def test_returns_unicode(self):
         # make sure it always return unicode
-        assert isinstance(remove_tags(b'no tags'), six.text_type)
-        assert isinstance(remove_tags(b'no tags', which_ones=('p',)), six.text_type)
-        assert isinstance(remove_tags(b'<p>one tag</p>'), six.text_type)
-        assert isinstance(remove_tags(b'<p>one tag</p>', which_ones=('p')), six.text_type)
-        assert isinstance(remove_tags(b'<a>link</a>', which_ones=('b',)), six.text_type)
-        assert isinstance(remove_tags(u'no tags'), six.text_type)
-        assert isinstance(remove_tags(u'no tags', which_ones=('p',)), six.text_type)
-        assert isinstance(remove_tags(u'<p>one tag</p>'), six.text_type)
-        assert isinstance(remove_tags(u'<p>one tag</p>', which_ones=('p')), six.text_type)
-        assert isinstance(remove_tags(u'<a>link</a>', which_ones=('b',)), six.text_type)
+        assert isinstance(remove_tags(b'no tags'), str)
+        assert isinstance(remove_tags(b'no tags', which_ones=('p',)), str)
+        assert isinstance(remove_tags(b'<p>one tag</p>'), str)
+        assert isinstance(remove_tags(b'<p>one tag</p>', which_ones=('p')), str)
+        assert isinstance(remove_tags(b'<a>link</a>', which_ones=('b',)), str)
+        assert isinstance(remove_tags(u'no tags'), str)
+        assert isinstance(remove_tags(u'no tags', which_ones=('p',)), str)
+        assert isinstance(remove_tags(u'<p>one tag</p>'), str)
+        assert isinstance(remove_tags(u'<p>one tag</p>', which_ones=('p')), str)
+        assert isinstance(remove_tags(u'<a>link</a>', which_ones=('b',)), str)
 
     def test_remove_tags_without_tags(self):
         # text without tags
@@ -160,14 +167,14 @@ def test_uppercase_tags(self):
 class RemoveTagsWithContentTest(unittest.TestCase):
     def test_returns_unicode(self):
         # make sure it always return unicode
-        assert isinstance(remove_tags_with_content(b'no tags'), six.text_type)
-        assert isinstance(remove_tags_with_content(b'no tags', which_ones=('p',)), six.text_type)
-        assert isinstance(remove_tags_with_content(b'<p>one tag</p>', which_ones=('p',)), six.text_type)
-        assert isinstance(remove_tags_with_content(b'<a>link</a>', which_ones=('b',)), six.text_type)
-        assert isinstance(remove_tags_with_content(u'no tags'), six.text_type)
-        assert isinstance(remove_tags_with_content(u'no tags', which_ones=('p',)), six.text_type)
-        assert isinstance(remove_tags_with_content(u'<p>one tag</p>', which_ones=('p',)), six.text_type)
-        assert isinstance(remove_tags_with_content(u'<a>link</a>', which_ones=('b',)), six.text_type)
+        assert isinstance(remove_tags_with_content(b'no tags'), str)
+        assert isinstance(remove_tags_with_content(b'no tags', which_ones=('p',)), str)
+        assert isinstance(remove_tags_with_content(b'<p>one tag</p>', which_ones=('p',)), str)
+        assert isinstance(remove_tags_with_content(b'<a>link</a>', which_ones=('b',)), str)
+        assert isinstance(remove_tags_with_content(u'no tags'), str)
+        assert isinstance(remove_tags_with_content(u'no tags', which_ones=('p',)), str)
+        assert isinstance(remove_tags_with_content(u'<p>one tag</p>', which_ones=('p',)), str)
+        assert isinstance(remove_tags_with_content(u'<a>link</a>', which_ones=('b',)), str)
 
     def test_without_tags(self):
         # text without tags
@@ -194,13 +201,13 @@ def test_tags_with_shared_prefix(self):
 class ReplaceEscapeCharsTest(unittest.TestCase):
     def test_returns_unicode(self):
         # make sure it always return unicode
-        assert isinstance(replace_escape_chars(b'no ec'), six.text_type)
-        assert isinstance(replace_escape_chars(b'no ec', replace_by='str'), six.text_type)
-        assert isinstance(replace_escape_chars(b'no ec', replace_by=u'str'), six.text_type)
-        assert isinstance(replace_escape_chars(b'no ec', which_ones=('\n', '\t',)), six.text_type)
-        assert isinstance(replace_escape_chars(u'no ec'), six.text_type)
-        assert isinstance(replace_escape_chars(u'no ec', replace_by=u'str'), six.text_type)
-        assert isinstance(replace_escape_chars(u'no ec', which_ones=('\n', '\t',)), six.text_type)
+        assert isinstance(replace_escape_chars(b'no ec'), str)
+        assert isinstance(replace_escape_chars(b'no ec', replace_by='str'), str)
+        assert isinstance(replace_escape_chars(b'no ec', replace_by=u'str'), str)
+        assert isinstance(replace_escape_chars(b'no ec', which_ones=('\n', '\t',)), str)
+        assert isinstance(replace_escape_chars(u'no ec'), str)
+        assert isinstance(replace_escape_chars(u'no ec', replace_by=u'str'), str)
+        assert isinstance(replace_escape_chars(u'no ec', which_ones=('\n', '\t',)), str)
 
     def test_without_escape_chars(self):
         # text without escape chars
@@ -226,8 +233,8 @@ class UnquoteMarkupTest(unittest.TestCase):
 
     def test_returns_unicode(self):
         # make sure it always return unicode
-        assert isinstance(unquote_markup(self.sample_txt1.encode('latin-1')), six.text_type)
-        assert isinstance(unquote_markup(self.sample_txt2), six.text_type)
+        assert isinstance(unquote_markup(self.sample_txt1.encode('latin-1')), str)
+        assert isinstance(unquote_markup(self.sample_txt2), str)
 
     def test_unquote_markup(self):
         self.assertEqual(unquote_markup(self.sample_txt1), u"""<node1>hi, this is sample text with entities: & \xa9
diff --git a/tests/test_url.py b/tests/test_url.py
index 07695500..9b854232 100644
--- a/tests/test_url.py
+++ b/tests/test_url.py
@@ -1,15 +1,25 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
 import os
 import unittest
+from urllib.parse import urlparse
 
 import pytest
-from six.moves.urllib.parse import urlparse
 
-from w3lib.url import (is_url, safe_url_string, safe_download_url,
-    url_query_parameter, add_or_replace_parameter, url_query_cleaner,
-    file_uri_to_path, parse_data_uri, path_to_file_uri, any_to_uri,
-    urljoin_rfc, canonicalize_url, parse_url, add_or_replace_parameters)
+from w3lib.url import (
+    add_or_replace_parameter,
+    add_or_replace_parameters,
+    any_to_uri,
+    canonicalize_url,
+    file_uri_to_path,
+    is_url,
+    parse_data_uri,
+    parse_url,
+    path_to_file_uri,
+    safe_download_url,
+    safe_url_string,
+    url_query_parameter,
+    url_query_cleaner,
+    urljoin_rfc,
+)
 
 
 class UrlTests(unittest.TestCase):
diff --git a/w3lib/form.py b/w3lib/form.py
index 6a5eb403..9181b057 100644
--- a/w3lib/form.py
+++ b/w3lib/form.py
@@ -1,9 +1,6 @@
 import warnings
-import six
-if six.PY2:
-    from cStringIO import StringIO as BytesIO
-else:
-    from io import BytesIO
+from io import BytesIO
+
 from w3lib.util import unicode_to_str
 
 
diff --git a/w3lib/html.py b/w3lib/html.py
index 87d8d214..e87d96a1 100644
--- a/w3lib/html.py
+++ b/w3lib/html.py
@@ -5,16 +5,16 @@
 
 import warnings
 import re
-import six
-from six import moves
+from html.entities import name2codepoint
+from urllib.parse import urljoin
 
 from w3lib.util import to_bytes, to_unicode
 from w3lib.url import safe_url_string
 
 _ent_re = re.compile(r'&((?P<named>[a-z\d]+)|#(?P<dec>\d+)|#x(?P<hex>[a-f\d]+))(?P<semicolon>;?)', re.IGNORECASE)
 _tag_re = re.compile(r'<[a-zA-Z\/!].*?>', re.DOTALL)
-_baseurl_re = re.compile(six.u(r'<base\s[^>]*href\s*=\s*[\"\']\s*([^\"\'\s]+)\s*[\"\']'), re.I)
-_meta_refresh_re = re.compile(six.u(r'<meta\s[^>]*http-equiv[^>]*refresh[^>]*content\s*=\s*(?P<quote>["\'])(?P<int>(\d*\.)?\d+)\s*;\s*url=\s*(?P<url>.*?)(?P=quote)'), re.DOTALL | re.IGNORECASE)
+_baseurl_re = re.compile(r'<base\s[^>]*href\s*=\s*[\"\']\s*([^\"\'\s]+)\s*[\"\']', re.I)
+_meta_refresh_re = re.compile(r'<meta\s[^>]*http-equiv[^>]*refresh[^>]*content\s*=\s*(?P<quote>["\'])(?P<int>(\d*\.)?\d+)\s*;\s*url=\s*(?P<url>.*?)(?P=quote)', re.DOTALL | re.IGNORECASE)
 _cdata_re = re.compile(r'((?P<cdata_s><!\[CDATA\[)(?P<cdata_d>.*?)(?P<cdata_e>\]\]>))', re.DOTALL)
 
 HTML5_WHITESPACE = ' \t\n\r\x0c'
@@ -77,8 +77,10 @@ def convert_entity(m):
             if entity_name.lower() in keep:
                 return m.group(0)
             else:
-                number = (moves.html_entities.name2codepoint.get(entity_name) or
-                    moves.html_entities.name2codepoint.get(entity_name.lower()))
+                number = (
+                    name2codepoint.get(entity_name)
+                    or name2codepoint.get(entity_name.lower())
+                )
         if number is not None:
             # Numeric character references in the 80-9F range are typically
             # interpreted by browsers as representing the characters mapped
@@ -86,9 +88,9 @@ def convert_entity(m):
             # see: http://en.wikipedia.org/wiki/Character_encodings_in_HTML
             try:
                 if 0x80 <= number <= 0x9f:
-                    return six.int2byte(number).decode('cp1252')
+                    return bytes((number,)).decode('cp1252')
                 else:
-                    return six.unichr(number)
+                    return chr(number)
             except ValueError:
                 pass
 
@@ -265,7 +267,7 @@ def _get_fragments(txt, pattern):
     text = to_unicode(text, encoding)
     ret_text = u''
     for fragment in _get_fragments(text, _cdata_re):
-        if isinstance(fragment, six.string_types):
+        if isinstance(fragment, str):
             # it's not a CDATA (so we try to remove its entities)
             ret_text += replace_entities(fragment, keep=keep, remove_illegal=remove_illegal)
         else:
@@ -284,7 +286,7 @@ def get_base_url(text, baseurl='', encoding='utf-8'):
     text = to_unicode(text, encoding)
     m = _baseurl_re.search(text)
     if m:
-        return moves.urllib.parse.urljoin(
+        return urljoin(
             safe_url_string(baseurl),
             safe_url_string(m.group(1), encoding=encoding)
         )
@@ -301,8 +303,6 @@ def get_meta_refresh(text, baseurl='', encoding='utf-8', ignore_tags=('script',
 
     """
 
-    if six.PY2:
-        baseurl = to_bytes(baseurl, encoding)
     try:
         text = to_unicode(text, encoding)
     except UnicodeDecodeError:
@@ -314,7 +314,7 @@ def get_meta_refresh(text, baseurl='', encoding='utf-8', ignore_tags=('script',
     if m:
         interval = float(m.group('int'))
         url = safe_url_string(m.group('url').strip(' "\''), encoding)
-        url = moves.urllib.parse.urljoin(baseurl, url)
+        url = urljoin(baseurl, url)
         return interval, url
     else:
         return None, None
diff --git a/w3lib/url.py b/w3lib/url.py
index bf12745d..e98da51d 100644
--- a/w3lib/url.py
+++ b/w3lib/url.py
@@ -5,17 +5,28 @@
 import base64
 import codecs
 import os
-import re
 import posixpath
-import warnings
+import re
 import string
+import warnings
 from collections import namedtuple
-import six
-from six.moves.urllib.parse import (urljoin, urlsplit, urlunsplit,
-                                    urldefrag, urlencode, urlparse,
-                                    quote, parse_qs, parse_qsl,
-                                    ParseResult, unquote, urlunparse)
-from six.moves.urllib.request import pathname2url, url2pathname
+from urllib.parse import (
+    _coerce_args,
+    parse_qs,
+    parse_qsl,
+    ParseResult,
+    quote,
+    unquote,
+    unquote_to_bytes,
+    urldefrag,
+    urlencode,
+    urljoin,
+    urlparse,
+    urlsplit,
+    urlunparse,
+    urlunsplit,
+)
+from urllib.request import pathname2url, url2pathname
 from w3lib.util import to_bytes, to_native_str, to_unicode
 
 
@@ -185,7 +196,7 @@ def url_query_cleaner(url, parameterlist=(), sep='&', kvsep='=', remove=False, u
 
     """
 
-    if isinstance(parameterlist, (six.text_type, bytes)):
+    if isinstance(parameterlist, (str, bytes)):
         parameterlist = [parameterlist]
     url, fragment = urldefrag(url)
     base, _, query = url.partition('?')
@@ -347,10 +358,7 @@ def parse_data_uri(uri):
     # delimiters, but it makes parsing easier and should not affect
     # well-formed URIs, as the delimiters used in this URI scheme are not
     # allowed, percent-encoded or not, in tokens.
-    if six.PY2:
-        uri = unquote(uri)
-    else:
-        uri = unquote_to_bytes(uri)
+    uri = unquote_to_bytes(uri)
 
     media_type = "text/plain"
     media_type_params = {}
@@ -470,33 +478,32 @@ def canonicalize_url(url, keep_blank_values=True, keep_fragments=False,
     # 1. decode query-string as UTF-8 (or keep raw bytes),
     #    sort values,
     #    and percent-encode them back
-    if six.PY2:
-        keyvals = parse_qsl(query, keep_blank_values)
-    else:
-        # Python3's urllib.parse.parse_qsl does not work as wanted
-        # for percent-encoded characters that do not match passed encoding,
-        # they get lost.
-        #
-        # e.g., 'q=b%a3' becomes [('q', 'b\ufffd')]
-        # (ie. with 'REPLACEMENT CHARACTER' (U+FFFD),
-        #      instead of \xa3 that you get with Python2's parse_qsl)
-        #
-        # what we want here is to keep raw bytes, and percent encode them
-        # so as to preserve whatever encoding what originally used.
-        #
-        # See https://tools.ietf.org/html/rfc3987#section-6.4:
-        #
-        # For example, it is possible to have a URI reference of
-        # "http://www.example.org/r%E9sum%E9.xml#r%C3%A9sum%C3%A9", where the
-        # document name is encoded in iso-8859-1 based on server settings, but
-        # where the fragment identifier is encoded in UTF-8 according to
-        # [XPointer]. The IRI corresponding to the above URI would be (in XML
-        # notation)
-        # "http://www.example.org/r%E9sum%E9.xml#r&#xE9;sum&#xE9;".
-        # Similar considerations apply to query parts.  The functionality of
-        # IRIs (namely, to be able to include non-ASCII characters) can only be
-        # used if the query part is encoded in UTF-8.
-        keyvals = parse_qsl_to_bytes(query, keep_blank_values)
+
+    # Python's urllib.parse.parse_qsl does not work as wanted
+    # for percent-encoded characters that do not match passed encoding,
+    # they get lost.
+    #
+    # e.g., 'q=b%a3' becomes [('q', 'b\ufffd')]
+    # (ie. with 'REPLACEMENT CHARACTER' (U+FFFD),
+    #      instead of \xa3 that you get with Python2's parse_qsl)
+    #
+    # what we want here is to keep raw bytes, and percent encode them
+    # so as to preserve whatever encoding what originally used.
+    #
+    # See https://tools.ietf.org/html/rfc3987#section-6.4:
+    #
+    # For example, it is possible to have a URI reference of
+    # "http://www.example.org/r%E9sum%E9.xml#r%C3%A9sum%C3%A9", where the
+    # document name is encoded in iso-8859-1 based on server settings, but
+    # where the fragment identifier is encoded in UTF-8 according to
+    # [XPointer]. The IRI corresponding to the above URI would be (in XML
+    # notation)
+    # "http://www.example.org/r%E9sum%E9.xml#r&#xE9;sum&#xE9;".
+    # Similar considerations apply to query parts.  The functionality of
+    # IRIs (namely, to be able to include non-ASCII characters) can only be
+    # used if the query part is encoded in UTF-8.
+    keyvals = parse_qsl_to_bytes(query, keep_blank_values)
+
     keyvals.sort()
     query = urlencode(keyvals)
 
@@ -520,17 +527,12 @@ def _unquotepath(path):
     for reserved in ('2f', '2F', '3f', '3F'):
         path = path.replace('%' + reserved, '%25' + reserved.upper())
 
-    if six.PY2:
-        # in Python 2, '%a3' becomes '\xa3', which is what we want
-        return unquote(path)
-    else:
-        # in Python 3,
-        # standard lib's unquote() does not work for non-UTF-8
-        # percent-escaped characters, they get lost.
-        # e.g., '%a3' becomes 'REPLACEMENT CHARACTER' (U+FFFD)
-        #
-        # unquote_to_bytes() returns raw bytes instead
-        return unquote_to_bytes(path)
+    # standard lib's unquote() does not work for non-UTF-8
+    # percent-escaped characters, they get lost.
+    # e.g., '%a3' becomes 'REPLACEMENT CHARACTER' (U+FFFD)
+    #
+    # unquote_to_bytes() returns raw bytes instead
+    return unquote_to_bytes(path)
 
 
 def parse_url(url, encoding=None):
@@ -542,51 +544,48 @@ def parse_url(url, encoding=None):
     return urlparse(to_unicode(url, encoding))
 
 
-if not six.PY2:
-    from urllib.parse import _coerce_args, unquote_to_bytes
+def parse_qsl_to_bytes(qs, keep_blank_values=False):
+    """Parse a query given as a string argument.
 
-    def parse_qsl_to_bytes(qs, keep_blank_values=False):
-        """Parse a query given as a string argument.
+    Data are returned as a list of name, value pairs as bytes.
 
-        Data are returned as a list of name, value pairs as bytes.
+    Arguments:
 
-        Arguments:
+    qs: percent-encoded query string to be parsed
 
-        qs: percent-encoded query string to be parsed
+    keep_blank_values: flag indicating whether blank values in
+        percent-encoded queries should be treated as blank strings.  A
+        true value indicates that blanks should be retained as blank
+        strings.  The default false value indicates that blank values
+        are to be ignored and treated as if they were  not included.
 
-        keep_blank_values: flag indicating whether blank values in
-            percent-encoded queries should be treated as blank strings.  A
-            true value indicates that blanks should be retained as blank
-            strings.  The default false value indicates that blank values
-            are to be ignored and treated as if they were  not included.
-
-        """
-        # This code is the same as Python3's parse_qsl()
-        # (at https://hg.python.org/cpython/rev/c38ac7ab8d9a)
-        # except for the unquote(s, encoding, errors) calls replaced
-        # with unquote_to_bytes(s)
-        qs, _coerce_result = _coerce_args(qs)
-        pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
-        r = []
-        for name_value in pairs:
-            if not name_value:
+    """
+    # This code is the same as Python3's parse_qsl()
+    # (at https://hg.python.org/cpython/rev/c38ac7ab8d9a)
+    # except for the unquote(s, encoding, errors) calls replaced
+    # with unquote_to_bytes(s)
+    qs, _coerce_result = _coerce_args(qs)
+    pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
+    r = []
+    for name_value in pairs:
+        if not name_value:
+            continue
+        nv = name_value.split('=', 1)
+        if len(nv) != 2:
+            # Handle case of a control-name with no equal sign
+            if keep_blank_values:
+                nv.append('')
+            else:
                 continue
-            nv = name_value.split('=', 1)
-            if len(nv) != 2:
-                # Handle case of a control-name with no equal sign
-                if keep_blank_values:
-                    nv.append('')
-                else:
-                    continue
-            if len(nv[1]) or keep_blank_values:
-                name = nv[0].replace('+', ' ')
-                name = unquote_to_bytes(name)
-                name = _coerce_result(name)
-                value = nv[1].replace('+', ' ')
-                value = unquote_to_bytes(value)
-                value = _coerce_result(value)
-                r.append((name, value))
-        return r
+        if len(nv[1]) or keep_blank_values:
+            name = nv[0].replace('+', ' ')
+            name = unquote_to_bytes(name)
+            name = _coerce_result(name)
+            value = nv[1].replace('+', ' ')
+            value = unquote_to_bytes(value)
+            value = _coerce_result(value)
+            r.append((name, value))
+    return r
 
 
 def urljoin_rfc(base, ref, encoding='utf-8'):
diff --git a/w3lib/util.py b/w3lib/util.py
index d8513eef..02deeeea 100644
--- a/w3lib/util.py
+++ b/w3lib/util.py
@@ -1,5 +1,3 @@
-import six
-
 def str_to_unicode(text, encoding=None, errors='strict'):
     if encoding is None:
         encoding = 'utf-8'
@@ -10,16 +8,16 @@ def str_to_unicode(text, encoding=None, errors='strict'):
 def unicode_to_str(text, encoding=None, errors='strict'):
     if encoding is None:
         encoding = 'utf-8'
-    if isinstance(text, six.text_type):
+    if isinstance(text, str):
         return text.encode(encoding, errors)
     return text
 
 def to_unicode(text, encoding=None, errors='strict'):
     """Return the unicode representation of a bytes object `text`. If `text`
     is already an unicode object, return it as-is."""
-    if isinstance(text, six.text_type):
+    if isinstance(text, str):
         return text
-    if not isinstance(text, (bytes, six.text_type)):
+    if not isinstance(text, (bytes, str)):
         raise TypeError('to_unicode must receive a bytes, str or unicode '
                         'object, got %s' % type(text).__name__)
     if encoding is None:
@@ -31,7 +29,7 @@ def to_bytes(text, encoding=None, errors='strict'):
     is already a bytes object, return it as-is."""
     if isinstance(text, bytes):
         return text
-    if not isinstance(text, six.string_types):
+    if not isinstance(text, str):
         raise TypeError('to_bytes must receive a unicode, str or bytes '
                         'object, got %s' % type(text).__name__)
     if encoding is None:
@@ -39,9 +37,5 @@ def to_bytes(text, encoding=None, errors='strict'):
     return text.encode(encoding, errors)
 
 def to_native_str(text, encoding=None, errors='strict'):
-    """ Return str representation of `text`
-    (bytes in Python 2.x and unicode in Python 3.x). """
-    if six.PY2:
-        return to_bytes(text, encoding, errors)
-    else:
-        return to_unicode(text, encoding, errors)
+    """ Return str representation of `text` """
+    return to_unicode(text, encoding, errors)

From fd1f8d0d2ac45d6cabf8e56257a64b771e8f543c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Sat, 20 Mar 2021 13:20:00 +0100
Subject: [PATCH 3/8] Fix style issues

---
 pytest.ini    | 2 ++
 w3lib/html.py | 2 +-
 w3lib/url.py  | 1 -
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index 4f23e3f8..30701606 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,6 +1,8 @@
 [pytest]
 doctest_optionflags = ALLOW_UNICODE ALLOW_BYTES
 flake8-ignore =
+    W503
+
     docs/conf.py E121 E122 E265 E401 E501
     tests/test_encoding.py E128 E221 E241 E302 E401 E501 E731
     tests/test_form.py E265 E501
diff --git a/w3lib/html.py b/w3lib/html.py
index e87d96a1..cbb1a9b0 100644
--- a/w3lib/html.py
+++ b/w3lib/html.py
@@ -8,7 +8,7 @@
 from html.entities import name2codepoint
 from urllib.parse import urljoin
 
-from w3lib.util import to_bytes, to_unicode
+from w3lib.util import to_unicode
 from w3lib.url import safe_url_string
 
 _ent_re = re.compile(r'&((?P<named>[a-z\d]+)|#(?P<dec>\d+)|#x(?P<hex>[a-f\d]+))(?P<semicolon>;?)', re.IGNORECASE)
diff --git a/w3lib/url.py b/w3lib/url.py
index e98da51d..d27dbd52 100644
--- a/w3lib/url.py
+++ b/w3lib/url.py
@@ -16,7 +16,6 @@
     parse_qsl,
     ParseResult,
     quote,
-    unquote,
     unquote_to_bytes,
     urldefrag,
     urlencode,

From ea2a4ceeaaf3cc0c08408127c4099f8977b72da8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Wed, 24 Mar 2021 11:30:21 +0100
Subject: [PATCH 4/8] Remove unused tests/py3-ignores.txt

---
 tests/py3-ignores.txt | 5 -----
 1 file changed, 5 deletions(-)
 delete mode 100644 tests/py3-ignores.txt

diff --git a/tests/py3-ignores.txt b/tests/py3-ignores.txt
deleted file mode 100644
index 09f34ec9..00000000
--- a/tests/py3-ignores.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-w3lib/encoding.py
-w3lib/form.py
-w3lib/html.py
-w3lib/http.py
-w3lib/url.py

From 9922b177c7e701f72be058b0786ae43b63c3e687 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Wed, 24 Mar 2021 11:30:55 +0100
Subject: [PATCH 5/8] =?UTF-8?q?Provide=20a=20URL=20that=20indicates=20the?=
 =?UTF-8?q?=20reasoning=20behind=20ignoring=20Flake8=E2=80=99s=20W503?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 pytest.ini | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytest.ini b/pytest.ini
index 30701606..94b29688 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,7 +1,7 @@
 [pytest]
 doctest_optionflags = ALLOW_UNICODE ALLOW_BYTES
 flake8-ignore =
-    W503
+    W503  # https://www.flake8rules.com/rules/W503.html
 
     docs/conf.py E121 E122 E265 E401 E501
     tests/test_encoding.py E128 E221 E241 E302 E401 E501 E731

From 0e049871d0a20c2a20964f3ae17a97e47162ade8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Wed, 24 Mar 2021 11:42:14 +0100
Subject: [PATCH 6/8] Remove the u prefix from literal strings

---
 docs/conf.py           |  16 ++--
 tests/test_encoding.py |  84 ++++++++---------
 tests/test_form.py     |  20 ++---
 tests/test_html.py     | 200 ++++++++++++++++++++---------------------
 tests/test_http.py     |   4 +-
 tests/test_url.py      | 156 ++++++++++++++++----------------
 w3lib/encoding.py      |   4 +-
 w3lib/form.py          |   4 +-
 w3lib/html.py          |  34 +++----
 w3lib/url.py           |   6 +-
 10 files changed, 264 insertions(+), 264 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index b786f2b9..d79efcf4 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -47,8 +47,8 @@
 master_doc = 'index'
 
 # General information about the project.
-project = u'w3lib'
-copyright = u'2014, w3lib developers'
+project = 'w3lib'
+copyright = '2014, w3lib developers'
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
@@ -190,8 +190,8 @@
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title, author, documentclass [howto/manual]).
 latex_documents = [
-  ('index', 'w3lib.tex', u'w3lib Documentation',
-   u'w3lib developers', 'manual'),
+  ('index', 'w3lib.tex', 'w3lib Documentation',
+   'w3lib developers', 'manual'),
 ]
 
 # The name of an image file (relative to this directory) to place at the top of
@@ -220,8 +220,8 @@
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
 man_pages = [
-    ('index', 'w3lib', u'w3lib Documentation',
-     [u'w3lib developers'], 1)
+    ('index', 'w3lib', 'w3lib Documentation',
+     ['w3lib developers'], 1)
 ]
 
 # If true, show URL addresses after external links.
@@ -234,8 +234,8 @@
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-  ('index', 'w3lib', u'w3lib Documentation',
-   u'w3lib developers', 'w3lib', 'One line description of project.',
+  ('index', 'w3lib', 'w3lib Documentation',
+   'w3lib developers', 'w3lib', 'One line description of project.',
    'Miscellaneous'),
 ]
 
diff --git a/tests/test_encoding.py b/tests/test_encoding.py
index 3d3795cc..9faced10 100644
--- a/tests/test_encoding.py
+++ b/tests/test_encoding.py
@@ -29,7 +29,7 @@ class RequestEncodingTests(unittest.TestCase):
 
     def test_bom(self):
         # cjk water character in unicode
-        water_unicode = u'\u6C34'
+        water_unicode = '\u6C34'
         # BOM + water character encoded
         utf16be = b'\xfe\xff\x6c\x34'
         utf16le = b'\xff\xfe\x34\x6c'
@@ -69,19 +69,19 @@ def test_html_body_declared_encoding(self):
 
     def test_html_body_declared_encoding_unicode(self):
         # html_body_declared_encoding should work when unicode body is passed
-        self.assertEqual(None, html_body_declared_encoding(u"something else"))
+        self.assertEqual(None, html_body_declared_encoding("something else"))
 
         for fragment in self.utf8_fragments:
             encoding = html_body_declared_encoding(fragment.decode('utf8'))
             self.assertEqual(encoding, 'utf-8', fragment)
 
-        self.assertEqual(None, html_body_declared_encoding(u"""
+        self.assertEqual(None, html_body_declared_encoding("""
             <head></head><body>
             this isn't searched
             <meta charset="utf-8">
         """))
         self.assertEqual(None, html_body_declared_encoding(
-            u"""<meta http-equiv="Fake-Content-Type-Header" content="text/html; charset=utf-8">"""))
+            """<meta http-equiv="Fake-Content-Type-Header" content="text/html; charset=utf-8">"""))
 
 
 class CodecsEncodingTestCase(unittest.TestCase):
@@ -95,10 +95,10 @@ def test_resolve_encoding(self):
 class UnicodeDecodingTestCase(unittest.TestCase):
 
     def test_utf8(self):
-        self.assertEqual(to_unicode(b'\xc2\xa3', 'utf-8'), u'\xa3')
+        self.assertEqual(to_unicode(b'\xc2\xa3', 'utf-8'), '\xa3')
 
     def test_invalid_utf8(self):
-        self.assertEqual(to_unicode(b'\xc2\xc2\xa3', 'utf-8'), u'\ufffd\xa3')
+        self.assertEqual(to_unicode(b'\xc2\xc2\xa3', 'utf-8'), '\ufffd\xa3')
 
 
 def ct(charset):
@@ -110,7 +110,7 @@ def norm_encoding(enc):
 class HtmlConversionTests(unittest.TestCase):
 
     def test_unicode_body(self):
-        unicode_string = u'\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0447\u0435\u0441\u043a\u0438\u0439 \u0442\u0435\u043a\u0441\u0442'
+        unicode_string = '\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0447\u0435\u0441\u043a\u0438\u0439 \u0442\u0435\u043a\u0441\u0442'
         original_string = unicode_string.encode('cp1251')
         encoding, body_unicode = html_to_unicode(ct('cp1251'), original_string)
         # check body_as_unicode
@@ -137,23 +137,23 @@ def test_content_type_and_conversion(self):
         """Test content type header is interpreted and text converted as
         expected
         """
-        self._assert_encoding('utf-8', b"\xc2\xa3", 'utf-8', u"\xa3")
+        self._assert_encoding('utf-8', b"\xc2\xa3", 'utf-8', "\xa3")
         # something like this in the scrapy tests - but that's invalid?
-        # self._assert_encoding('', "\xa3", 'utf-8', u"\xa3")
+        # self._assert_encoding('', "\xa3", 'utf-8', "\xa3")
         # iso-8859-1 is overridden to cp1252
-        self._assert_encoding('iso-8859-1', b"\xa3", 'cp1252', u"\xa3")
-        self._assert_encoding('', b"\xc2\xa3", 'utf-8', u"\xa3")
-        self._assert_encoding('none', b"\xc2\xa3", 'utf-8', u"\xa3")
-        self._assert_encoding('gb2312', b"\xa8D", 'gb18030', u"\u2015")
-        self._assert_encoding('gbk', b"\xa8D", 'gb18030', u"\u2015")
-        self._assert_encoding('big5', b"\xf9\xda", 'big5hkscs', u"\u6052")
+        self._assert_encoding('iso-8859-1', b"\xa3", 'cp1252', "\xa3")
+        self._assert_encoding('', b"\xc2\xa3", 'utf-8', "\xa3")
+        self._assert_encoding('none', b"\xc2\xa3", 'utf-8', "\xa3")
+        self._assert_encoding('gb2312', b"\xa8D", 'gb18030', "\u2015")
+        self._assert_encoding('gbk', b"\xa8D", 'gb18030', "\u2015")
+        self._assert_encoding('big5', b"\xf9\xda", 'big5hkscs', "\u6052")
 
     def test_invalid_utf8_encoded_body_with_valid_utf8_BOM(self):
         # unlike scrapy, the BOM is stripped
         self._assert_encoding('utf-8', b"\xef\xbb\xbfWORD\xe3\xabWORD2",
-                'utf-8', u'WORD\ufffdWORD2')
+                'utf-8', 'WORD\ufffdWORD2')
         self._assert_encoding(None, b"\xef\xbb\xbfWORD\xe3\xabWORD2",
-                'utf-8', u'WORD\ufffdWORD2')
+                'utf-8', 'WORD\ufffdWORD2')
 
     def test_utf8_unexpected_end_of_data_with_valid_utf8_BOM(self):
         # Python implementations handle unexpected end of UTF8 data
@@ -163,24 +163,24 @@ def test_utf8_unexpected_end_of_data_with_valid_utf8_BOM(self):
 
         # unlike scrapy, the BOM is stripped
         self._assert_encoding('utf-8', b"\xef\xbb\xbfWORD\xe3\xab",
-                'utf-8', [u'WORD\ufffd\ufffd', u'WORD\ufffd'])
+                'utf-8', ['WORD\ufffd\ufffd', 'WORD\ufffd'])
         self._assert_encoding(None, b"\xef\xbb\xbfWORD\xe3\xab",
-                'utf-8', [u'WORD\ufffd\ufffd', u'WORD\ufffd'])
+                'utf-8', ['WORD\ufffd\ufffd', 'WORD\ufffd'])
 
     def test_replace_wrong_encoding(self):
         """Test invalid chars are replaced properly"""
         encoding, body_unicode = html_to_unicode(ct('utf-8'),
                 b'PREFIX\xe3\xabSUFFIX')
         # XXX: Policy for replacing invalid chars may suffer minor variations
-        # but it should always contain the unicode replacement char (u'\ufffd')
-        assert u'\ufffd' in body_unicode, repr(body_unicode)
-        assert u'PREFIX' in body_unicode, repr(body_unicode)
-        assert u'SUFFIX' in body_unicode, repr(body_unicode)
+        # but it should always contain the unicode replacement char ('\ufffd')
+        assert '\ufffd' in body_unicode, repr(body_unicode)
+        assert 'PREFIX' in body_unicode, repr(body_unicode)
+        assert 'SUFFIX' in body_unicode, repr(body_unicode)
 
         # Do not destroy html tags due to encoding bugs
         encoding, body_unicode = html_to_unicode(ct('utf-8'),
             b'\xf0<span>value</span>')
-        assert u'<span>value</span>' in body_unicode, repr(body_unicode)
+        assert '<span>value</span>' in body_unicode, repr(body_unicode)
 
     def _assert_encoding_detected(self, content_type, expected_encoding, body,
             **kwargs):
@@ -193,39 +193,39 @@ def test_BOM(self):
         # utf-16 cases already tested, as is the BOM detection function
 
         # http header takes precedence, irrespective of BOM
-        bom_be_str = codecs.BOM_UTF16_BE + u"hi".encode('utf-16-be')
-        expected = u'\ufffd\ufffd\x00h\x00i'
+        bom_be_str = codecs.BOM_UTF16_BE + "hi".encode('utf-16-be')
+        expected = '\ufffd\ufffd\x00h\x00i'
         self._assert_encoding('utf-8', bom_be_str, 'utf-8', expected)
 
         # BOM is stripped when it agrees with the encoding, or used to
         # determine encoding
         bom_utf8_str = codecs.BOM_UTF8 + b'hi'
-        self._assert_encoding('utf-8', bom_utf8_str, 'utf-8', u"hi")
-        self._assert_encoding(None, bom_utf8_str, 'utf-8', u"hi")
+        self._assert_encoding('utf-8', bom_utf8_str, 'utf-8', "hi")
+        self._assert_encoding(None, bom_utf8_str, 'utf-8', "hi")
 
     def test_utf16_32(self):
         # tools.ietf.org/html/rfc2781 section 4.3
 
         # USE BOM and strip it
-        bom_be_str = codecs.BOM_UTF16_BE + u"hi".encode('utf-16-be')
-        self._assert_encoding('utf-16', bom_be_str, 'utf-16-be', u"hi")
-        self._assert_encoding(None, bom_be_str, 'utf-16-be', u"hi")
+        bom_be_str = codecs.BOM_UTF16_BE + "hi".encode('utf-16-be')
+        self._assert_encoding('utf-16', bom_be_str, 'utf-16-be', "hi")
+        self._assert_encoding(None, bom_be_str, 'utf-16-be', "hi")
 
-        bom_le_str = codecs.BOM_UTF16_LE + u"hi".encode('utf-16-le')
-        self._assert_encoding('utf-16', bom_le_str, 'utf-16-le', u"hi")
-        self._assert_encoding(None, bom_le_str, 'utf-16-le', u"hi")
+        bom_le_str = codecs.BOM_UTF16_LE + "hi".encode('utf-16-le')
+        self._assert_encoding('utf-16', bom_le_str, 'utf-16-le', "hi")
+        self._assert_encoding(None, bom_le_str, 'utf-16-le', "hi")
 
-        bom_be_str = codecs.BOM_UTF32_BE + u"hi".encode('utf-32-be')
-        self._assert_encoding('utf-32', bom_be_str, 'utf-32-be', u"hi")
-        self._assert_encoding(None, bom_be_str, 'utf-32-be', u"hi")
+        bom_be_str = codecs.BOM_UTF32_BE + "hi".encode('utf-32-be')
+        self._assert_encoding('utf-32', bom_be_str, 'utf-32-be', "hi")
+        self._assert_encoding(None, bom_be_str, 'utf-32-be', "hi")
 
-        bom_le_str = codecs.BOM_UTF32_LE + u"hi".encode('utf-32-le')
-        self._assert_encoding('utf-32', bom_le_str, 'utf-32-le', u"hi")
-        self._assert_encoding(None, bom_le_str, 'utf-32-le', u"hi")
+        bom_le_str = codecs.BOM_UTF32_LE + "hi".encode('utf-32-le')
+        self._assert_encoding('utf-32', bom_le_str, 'utf-32-le', "hi")
+        self._assert_encoding(None, bom_le_str, 'utf-32-le', "hi")
 
         # if there is no BOM,  big endian should be chosen
-        self._assert_encoding('utf-16', u"hi".encode('utf-16-be'), 'utf-16-be', u"hi")
-        self._assert_encoding('utf-32', u"hi".encode('utf-32-be'), 'utf-32-be', u"hi")
+        self._assert_encoding('utf-16', "hi".encode('utf-16-be'), 'utf-16-be', "hi")
+        self._assert_encoding('utf-32', "hi".encode('utf-32-be'), 'utf-32-be', "hi")
 
     def test_python_crash(self):
         import random
diff --git a/tests/test_form.py b/tests/test_form.py
index 280d8795..4a6d3052 100644
--- a/tests/test_form.py
+++ b/tests/test_form.py
@@ -23,20 +23,20 @@ def test_encode_multipart(self):
 
     def test_encode_multipart_unicode(self):
         data = OrderedDict([
-            (u'ключ1', u'значение1'.encode('utf8')),
-            (u'ключ2', u'значение2'),
+            ('ключ1', 'значение1'.encode('utf8')),
+            ('ключ2', 'значение2'),
         ])
         with warnings.catch_warnings(record=True):
             body, boundary = encode_multipart(data)
         expected_body = (
-            u'\r\n--{boundary}'
-            u'\r\nContent-Disposition: form-data; name="ключ1"\r\n'
-            u'\r\nзначение1'
-            u'\r\n--{boundary}'
-            u'\r\nContent-Disposition: form-data; name="ключ2"\r\n'
-            u'\r\nзначение2'
-            u'\r\n--{boundary}--'
-            u'\r\n'.format(boundary=boundary).encode('utf8')
+            '\r\n--{boundary}'
+            '\r\nContent-Disposition: form-data; name="ключ1"\r\n'
+            '\r\nзначение1'
+            '\r\n--{boundary}'
+            '\r\nContent-Disposition: form-data; name="ключ2"\r\n'
+            '\r\nзначение2'
+            '\r\n--{boundary}--'
+            '\r\n'.format(boundary=boundary).encode('utf8')
         )
         self.assertEqual(body, expected_body)
 
diff --git a/tests/test_html.py b/tests/test_html.py
index 89a651e4..d6a6c22b 100644
--- a/tests/test_html.py
+++ b/tests/test_html.py
@@ -18,37 +18,37 @@ def test_returns_unicode(self):
         # make sure it always return uncode
         assert isinstance(replace_entities(b'no entities'), str)
         assert isinstance(replace_entities(b'Price: &pound;100!'),  str)
-        assert isinstance(replace_entities(u'no entities'), str)
-        assert isinstance(replace_entities(u'Price: &pound;100!'),  str)
+        assert isinstance(replace_entities('no entities'), str)
+        assert isinstance(replace_entities('Price: &pound;100!'),  str)
 
     def test_regular(self):
         # regular conversions
-        self.assertEqual(replace_entities(u'As low as &#163;100!'),
-                         u'As low as \xa3100!')
+        self.assertEqual(replace_entities('As low as &#163;100!'),
+                         'As low as \xa3100!')
         self.assertEqual(replace_entities(b'As low as &pound;100!'),
-                         u'As low as \xa3100!')
+                         'As low as \xa3100!')
         self.assertEqual(replace_entities('redirectTo=search&searchtext=MR0221Y&aff=buyat&affsrc=d_data&cm_mmc=buyat-_-ELECTRICAL & SEASONAL-_-MR0221Y-_-9-carat gold &frac12;oz solid crucifix pendant'),
-                         u'redirectTo=search&searchtext=MR0221Y&aff=buyat&affsrc=d_data&cm_mmc=buyat-_-ELECTRICAL & SEASONAL-_-MR0221Y-_-9-carat gold \xbdoz solid crucifix pendant')
+                         'redirectTo=search&searchtext=MR0221Y&aff=buyat&affsrc=d_data&cm_mmc=buyat-_-ELECTRICAL & SEASONAL-_-MR0221Y-_-9-carat gold \xbdoz solid crucifix pendant')
 
     def test_keep_entities(self):
         # keep some entities
         self.assertEqual(replace_entities(b'<b>Low &lt; High &amp; Medium &pound; six</b>', keep=['lt', 'amp']),
-                         u'<b>Low &lt; High &amp; Medium \xa3 six</b>')
-        self.assertEqual(replace_entities(u'<b>Low &lt; High &amp; Medium &pound; six</b>', keep=[u'lt', u'amp']),
-                         u'<b>Low &lt; High &amp; Medium \xa3 six</b>')
+                         '<b>Low &lt; High &amp; Medium \xa3 six</b>')
+        self.assertEqual(replace_entities('<b>Low &lt; High &amp; Medium &pound; six</b>', keep=['lt', 'amp']),
+                         '<b>Low &lt; High &amp; Medium \xa3 six</b>')
 
     def test_illegal_entities(self):
         self.assertEqual(replace_entities('a &lt; b &illegal; c &#12345678; six', remove_illegal=False),
-                         u'a < b &illegal; c &#12345678; six')
+                         'a < b &illegal; c &#12345678; six')
         self.assertEqual(replace_entities('a &lt; b &illegal; c &#12345678; six', remove_illegal=True),
-                         u'a < b  c  six')
-        self.assertEqual(replace_entities('x&#x2264;y'), u'x\u2264y')
-        self.assertEqual(replace_entities('x&#157;y'), u'xy')
-        self.assertEqual(replace_entities('x&#157;y', remove_illegal=False), u'x&#157;y')
+                         'a < b  c  six')
+        self.assertEqual(replace_entities('x&#x2264;y'), 'x\u2264y')
+        self.assertEqual(replace_entities('x&#157;y'), 'xy')
+        self.assertEqual(replace_entities('x&#157;y', remove_illegal=False), 'x&#157;y')
 
     def test_browser_hack(self):
         # check browser hack for numeric character references in the 80-9F range
-        self.assertEqual(replace_entities('x&#153;y', encoding='cp1252'), u'x\u2122y')
+        self.assertEqual(replace_entities('x&#153;y', encoding='cp1252'), 'x\u2122y')
         self.assertEqual(replace_entities('x&#x99;y', encoding='cp1252'), u'x\u2122y')
 
     def test_missing_semicolon(self):
@@ -60,19 +60,19 @@ def test_missing_semicolon(self):
                 ('&#x41h', 'Ah',),
                 ('&#65!', 'A!',),
                 ('&#65x', 'Ax',),
-                ('&sup3!', u'\u00B3!',),
-                ('&Aacute!', u'\u00C1!',),
-                ('&#9731!', u'\u2603!',),
-                ('&#153', u'\u2122',),
-                ('&#x99', u'\u2122',),
+                ('&sup3!', '\u00B3!',),
+                ('&Aacute!', '\u00C1!',),
+                ('&#9731!', '\u2603!',),
+                ('&#153', '\u2122',),
+                ('&#x99', '\u2122',),
                 ):
             self.assertEqual(replace_entities(entity, encoding='cp1252'), result)
-            self.assertEqual(replace_entities('x%sy' % entity, encoding='cp1252'), u'x%sy' % result)
+            self.assertEqual(replace_entities('x%sy' % entity, encoding='cp1252'), 'x%sy' % result)
 
 
     def test_encoding(self):
         self.assertEqual(replace_entities(b'x\x99&#153;&#8482;y', encoding='cp1252'), \
-                         u'x\u2122\u2122\u2122y')
+                         'x\u2122\u2122\u2122y')
 
 
 class ReplaceTagsTest(unittest.TestCase):
@@ -82,14 +82,14 @@ def test_returns_unicode(self):
         assert isinstance(replace_tags('no entities'), str)
 
     def test_replace_tags(self):
-        self.assertEqual(replace_tags(u'This text contains <a>some tag</a>'),
-                         u'This text contains some tag')
+        self.assertEqual(replace_tags('This text contains <a>some tag</a>'),
+                         'This text contains some tag')
         self.assertEqual(replace_tags(b'This text is very im<b>port</b>ant', ' '),
-                         u'This text is very im port ant')
+                         'This text is very im port ant')
 
     def test_replace_tags_multiline(self):
         self.assertEqual(replace_tags(b'Click <a class="one"\r\n href="url">here</a>'),
-                         u'Click here')
+                         'Click here')
 
 
 class RemoveCommentsTest(unittest.TestCase):
@@ -97,23 +97,23 @@ def test_returns_unicode(self):
         # make sure it always return unicode
         assert isinstance(remove_comments(b'without comments'), str)
         assert isinstance(remove_comments(b'<!-- with comments -->'), str)
-        assert isinstance(remove_comments(u'without comments'), str)
-        assert isinstance(remove_comments(u'<!-- with comments -->'), str)
+        assert isinstance(remove_comments('without comments'), str)
+        assert isinstance(remove_comments('<!-- with comments -->'), str)
 
     def test_no_comments(self):
         # text without comments
-        self.assertEqual(remove_comments(u'text without comments'), u'text without comments')
+        self.assertEqual(remove_comments('text without comments'), 'text without comments')
 
     def test_remove_comments(self):
         # text with comments
-        self.assertEqual(remove_comments(u'<!--text with comments-->'), u'')
-        self.assertEqual(remove_comments(u'Hello<!--World-->'), u'Hello')
-        self.assertEqual(remove_comments(u'Hello<!--My\nWorld-->'), u'Hello')
+        self.assertEqual(remove_comments('<!--text with comments-->'), '')
+        self.assertEqual(remove_comments('Hello<!--World-->'), 'Hello')
+        self.assertEqual(remove_comments('Hello<!--My\nWorld-->'), 'Hello')
 
-        self.assertEqual(remove_comments(b"test <!--textcoment--> whatever"), u'test  whatever')
-        self.assertEqual(remove_comments(b"test <!--\ntextcoment\n--> whatever"), u'test  whatever')
+        self.assertEqual(remove_comments(b"test <!--textcoment--> whatever"), 'test  whatever')
+        self.assertEqual(remove_comments(b"test <!--\ntextcoment\n--> whatever"), 'test  whatever')
 
-        self.assertEqual(remove_comments(b"test <!--"), u'test ')
+        self.assertEqual(remove_comments(b"test <!--"), 'test ')
 
 
 class RemoveTagsTest(unittest.TestCase):
@@ -124,44 +124,44 @@ def test_returns_unicode(self):
         assert isinstance(remove_tags(b'<p>one tag</p>'), str)
         assert isinstance(remove_tags(b'<p>one tag</p>', which_ones=('p')), str)
         assert isinstance(remove_tags(b'<a>link</a>', which_ones=('b',)), str)
-        assert isinstance(remove_tags(u'no tags'), str)
-        assert isinstance(remove_tags(u'no tags', which_ones=('p',)), str)
-        assert isinstance(remove_tags(u'<p>one tag</p>'), str)
-        assert isinstance(remove_tags(u'<p>one tag</p>', which_ones=('p')), str)
-        assert isinstance(remove_tags(u'<a>link</a>', which_ones=('b',)), str)
+        assert isinstance(remove_tags('no tags'), str)
+        assert isinstance(remove_tags('no tags', which_ones=('p',)), str)
+        assert isinstance(remove_tags('<p>one tag</p>'), str)
+        assert isinstance(remove_tags('<p>one tag</p>', which_ones=('p')), str)
+        assert isinstance(remove_tags('<a>link</a>', which_ones=('b',)), str)
 
     def test_remove_tags_without_tags(self):
         # text without tags
-        self.assertEqual(remove_tags(u'no tags'), u'no tags')
-        self.assertEqual(remove_tags(u'no tags', which_ones=('p', 'b',)), u'no tags')
+        self.assertEqual(remove_tags('no tags'), 'no tags')
+        self.assertEqual(remove_tags('no tags', which_ones=('p', 'b',)), 'no tags')
 
     def test_remove_tags(self):
         # text with tags
-        self.assertEqual(remove_tags(u'<p>one p tag</p>'), u'one p tag')
-        self.assertEqual(remove_tags(u'<p>one p tag</p>', which_ones=('b',)), u'<p>one p tag</p>')
+        self.assertEqual(remove_tags('<p>one p tag</p>'), 'one p tag')
+        self.assertEqual(remove_tags('<p>one p tag</p>', which_ones=('b',)), '<p>one p tag</p>')
 
-        self.assertEqual(remove_tags(u'<b>not will removed</b><i>i will removed</i>', which_ones=('i',)),
-                         u'<b>not will removed</b>i will removed')
+        self.assertEqual(remove_tags('<b>not will removed</b><i>i will removed</i>', which_ones=('i',)),
+                         '<b>not will removed</b>i will removed')
 
     def test_remove_tags_with_attributes(self):
         # text with tags and attributes
-        self.assertEqual(remove_tags(u'<p align="center" class="one">texty</p>'), u'texty')
-        self.assertEqual(remove_tags(u'<p align="center" class="one">texty</p>', which_ones=('b',)),
-                         u'<p align="center" class="one">texty</p>')
+        self.assertEqual(remove_tags('<p align="center" class="one">texty</p>'), 'texty')
+        self.assertEqual(remove_tags('<p align="center" class="one">texty</p>', which_ones=('b',)),
+                         '<p align="center" class="one">texty</p>')
 
     def test_remove_empty_tags(self):
         # text with empty tags
-        self.assertEqual(remove_tags(u'a<br />b<br/>c'), u'abc')
-        self.assertEqual(remove_tags(u'a<br />b<br/>c', which_ones=('br',)), u'abc')
+        self.assertEqual(remove_tags('a<br />b<br/>c'), 'abc')
+        self.assertEqual(remove_tags('a<br />b<br/>c', which_ones=('br',)), 'abc')
 
     def test_keep_argument(self):
-        self.assertEqual(remove_tags(u'<p>a<br />b<br/>c</p>', keep=('br',)), u'a<br />b<br/>c')
-        self.assertEqual(remove_tags(u'<p>a<br />b<br/>c</p>', keep=('p',)), u'<p>abc</p>')
-        self.assertEqual(remove_tags(u'<p>a<br />b<br/>c</p>', keep=('p', 'br', 'div')), u'<p>a<br />b<br/>c</p>')
+        self.assertEqual(remove_tags('<p>a<br />b<br/>c</p>', keep=('br',)), 'a<br />b<br/>c')
+        self.assertEqual(remove_tags('<p>a<br />b<br/>c</p>', keep=('p',)), '<p>abc</p>')
+        self.assertEqual(remove_tags('<p>a<br />b<br/>c</p>', keep=('p', 'br', 'div')), '<p>a<br />b<br/>c</p>')
 
     def test_uppercase_tags(self):
-        self.assertEqual(remove_tags(u'<foo></foo><bar></bar><baz/>', which_ones=('Foo', 'BAR', 'baZ')), u'')
-        self.assertEqual(remove_tags(u'<FOO></foO><BaR></bAr><BAZ/>', which_ones=('foo', 'bar', 'baz')), u'')
+        self.assertEqual(remove_tags('<foo></foo><bar></bar><baz/>', which_ones=('Foo', 'BAR', 'baZ')), '')
+        self.assertEqual(remove_tags('<FOO></foO><BaR></bAr><BAZ/>', which_ones=('foo', 'bar', 'baz')), '')
 
 
 class RemoveTagsWithContentTest(unittest.TestCase):
@@ -171,31 +171,31 @@ def test_returns_unicode(self):
         assert isinstance(remove_tags_with_content(b'no tags', which_ones=('p',)), str)
         assert isinstance(remove_tags_with_content(b'<p>one tag</p>', which_ones=('p',)), str)
         assert isinstance(remove_tags_with_content(b'<a>link</a>', which_ones=('b',)), str)
-        assert isinstance(remove_tags_with_content(u'no tags'), str)
-        assert isinstance(remove_tags_with_content(u'no tags', which_ones=('p',)), str)
-        assert isinstance(remove_tags_with_content(u'<p>one tag</p>', which_ones=('p',)), str)
-        assert isinstance(remove_tags_with_content(u'<a>link</a>', which_ones=('b',)), str)
+        assert isinstance(remove_tags_with_content('no tags'), str)
+        assert isinstance(remove_tags_with_content('no tags', which_ones=('p',)), str)
+        assert isinstance(remove_tags_with_content('<p>one tag</p>', which_ones=('p',)), str)
+        assert isinstance(remove_tags_with_content('<a>link</a>', which_ones=('b',)), str)
 
     def test_without_tags(self):
         # text without tags
-        self.assertEqual(remove_tags_with_content(u'no tags'), u'no tags')
-        self.assertEqual(remove_tags_with_content(u'no tags', which_ones=('p', 'b',)), u'no tags')
+        self.assertEqual(remove_tags_with_content('no tags'), 'no tags')
+        self.assertEqual(remove_tags_with_content('no tags', which_ones=('p', 'b',)), 'no tags')
 
     def test_with_tags(self):
         # text with tags
-        self.assertEqual(remove_tags_with_content(u'<p>one p tag</p>'), u'<p>one p tag</p>')
-        self.assertEqual(remove_tags_with_content(u'<p>one p tag</p>', which_ones=('p',)), u'')
+        self.assertEqual(remove_tags_with_content('<p>one p tag</p>'), '<p>one p tag</p>')
+        self.assertEqual(remove_tags_with_content('<p>one p tag</p>', which_ones=('p',)), '')
 
-        self.assertEqual(remove_tags_with_content(u'<b>not will removed</b><i>i will removed</i>', which_ones=('i',)),
-                         u'<b>not will removed</b>')
+        self.assertEqual(remove_tags_with_content('<b>not will removed</b><i>i will removed</i>', which_ones=('i',)),
+                         '<b>not will removed</b>')
 
     def test_empty_tags(self):
         # text with empty tags
-        self.assertEqual(remove_tags_with_content(u'<br/>a<br />', which_ones=('br',)), u'a')
+        self.assertEqual(remove_tags_with_content('<br/>a<br />', which_ones=('br',)), 'a')
 
     def test_tags_with_shared_prefix(self):
         # https://github.com/scrapy/w3lib/issues/114
-        self.assertEqual(remove_tags_with_content(u'<span></span><s></s>', which_ones=('s',)), u'<span></span>')
+        self.assertEqual(remove_tags_with_content('<span></span><s></s>', which_ones=('s',)), '<span></span>')
 
 
 class ReplaceEscapeCharsTest(unittest.TestCase):
@@ -203,33 +203,33 @@ def test_returns_unicode(self):
         # make sure it always return unicode
         assert isinstance(replace_escape_chars(b'no ec'), str)
         assert isinstance(replace_escape_chars(b'no ec', replace_by='str'), str)
-        assert isinstance(replace_escape_chars(b'no ec', replace_by=u'str'), str)
+        assert isinstance(replace_escape_chars(b'no ec', replace_by='str'), str)
         assert isinstance(replace_escape_chars(b'no ec', which_ones=('\n', '\t',)), str)
-        assert isinstance(replace_escape_chars(u'no ec'), str)
-        assert isinstance(replace_escape_chars(u'no ec', replace_by=u'str'), str)
-        assert isinstance(replace_escape_chars(u'no ec', which_ones=('\n', '\t',)), str)
+        assert isinstance(replace_escape_chars('no ec'), str)
+        assert isinstance(replace_escape_chars('no ec', replace_by='str'), str)
+        assert isinstance(replace_escape_chars('no ec', which_ones=('\n', '\t',)), str)
 
     def test_without_escape_chars(self):
         # text without escape chars
-        self.assertEqual(replace_escape_chars(u'no ec'), u'no ec')
-        self.assertEqual(replace_escape_chars(u'no ec', which_ones=('\n',)), u'no ec')
+        self.assertEqual(replace_escape_chars('no ec'), 'no ec')
+        self.assertEqual(replace_escape_chars('no ec', which_ones=('\n',)), 'no ec')
 
     def test_with_escape_chars(self):
         # text with escape chars
-        self.assertEqual(replace_escape_chars(u'escape\n\n'), u'escape')
-        self.assertEqual(replace_escape_chars(u'escape\n', which_ones=('\t',)), u'escape\n')
-        self.assertEqual(replace_escape_chars(u'escape\tchars\n', which_ones=('\t',)), 'escapechars\n')
-        self.assertEqual(replace_escape_chars(u'escape\tchars\n', replace_by=' '), 'escape chars ')
-        self.assertEqual(replace_escape_chars(u'escape\tchars\n', replace_by=u'\xa3'), u'escape\xa3chars\xa3')
-        self.assertEqual(replace_escape_chars(u'escape\tchars\n', replace_by=b'\xc2\xa3'), u'escape\xa3chars\xa3')
+        self.assertEqual(replace_escape_chars('escape\n\n'), 'escape')
+        self.assertEqual(replace_escape_chars('escape\n', which_ones=('\t',)), 'escape\n')
+        self.assertEqual(replace_escape_chars('escape\tchars\n', which_ones=('\t',)), 'escapechars\n')
+        self.assertEqual(replace_escape_chars('escape\tchars\n', replace_by=' '), 'escape chars ')
+        self.assertEqual(replace_escape_chars('escape\tchars\n', replace_by='\xa3'), 'escape\xa3chars\xa3')
+        self.assertEqual(replace_escape_chars('escape\tchars\n', replace_by=b'\xc2\xa3'), 'escape\xa3chars\xa3')
 
 
 class UnquoteMarkupTest(unittest.TestCase):
 
-    sample_txt1 = u"""<node1>hi, this is sample text with entities: &amp; &copy;
+    sample_txt1 = """<node1>hi, this is sample text with entities: &amp; &copy;
 <![CDATA[although this is inside a cdata! &amp; &quot;]]></node1>"""
-    sample_txt2 = u'<node2>blah&amp;blah<![CDATA[blahblahblah!&pound;]]>moreblah&lt;&gt;</node2>'
-    sample_txt3 = u'something&pound;&amp;more<node3><![CDATA[things, stuff, and such]]>what&quot;ever</node3><node4'
+    sample_txt2 = '<node2>blah&amp;blah<![CDATA[blahblahblah!&pound;]]>moreblah&lt;&gt;</node2>'
+    sample_txt3 = 'something&pound;&amp;more<node3><![CDATA[things, stuff, and such]]>what&quot;ever</node3><node4'
 
     def test_returns_unicode(self):
         # make sure it always return unicode
@@ -237,23 +237,23 @@ def test_returns_unicode(self):
         assert isinstance(unquote_markup(self.sample_txt2), str)
 
     def test_unquote_markup(self):
-        self.assertEqual(unquote_markup(self.sample_txt1), u"""<node1>hi, this is sample text with entities: & \xa9
+        self.assertEqual(unquote_markup(self.sample_txt1), """<node1>hi, this is sample text with entities: & \xa9
 although this is inside a cdata! &amp; &quot;</node1>""")
 
-        self.assertEqual(unquote_markup(self.sample_txt2), u'<node2>blah&blahblahblahblah!&pound;moreblah<></node2>')
+        self.assertEqual(unquote_markup(self.sample_txt2), '<node2>blah&blahblahblahblah!&pound;moreblah<></node2>')
 
-        self.assertEqual(unquote_markup(self.sample_txt1 + self.sample_txt2), u"""<node1>hi, this is sample text with entities: & \xa9
+        self.assertEqual(unquote_markup(self.sample_txt1 + self.sample_txt2), """<node1>hi, this is sample text with entities: & \xa9
 although this is inside a cdata! &amp; &quot;</node1><node2>blah&blahblahblahblah!&pound;moreblah<></node2>""")
 
-        self.assertEqual(unquote_markup(self.sample_txt3), u'something\xa3&more<node3>things, stuff, and suchwhat"ever</node3><node4')
+        self.assertEqual(unquote_markup(self.sample_txt3), 'something\xa3&more<node3>things, stuff, and suchwhat"ever</node3><node4')
 
 
 class GetBaseUrlTest(unittest.TestCase):
 
     def test_get_base_url(self):
-        baseurl = u'https://example.org'
+        baseurl = 'https://example.org'
 
-        text = u"""\
+        text = """\
             <html>\
             <head><title>Dummy</title><base href='http://example.org/something' /></head>\
             <body>blahablsdfsal&amp;</body>\
@@ -264,7 +264,7 @@ def test_get_base_url(self):
 
     def test_relative_url_with_absolute_path(self):
         baseurl = 'https://example.org'
-        text = u"""\
+        text = """\
             <html>\
             <head><title>Dummy</title><base href='/absolutepath' /></head>\
             <body>blahablsdfsal&amp;</body>\
@@ -281,9 +281,9 @@ def test_no_scheme_url(self):
         self.assertEqual(get_base_url(text, baseurl), 'https://noscheme.com/path')
 
     def test_attributes_before_href(self):
-        baseurl = u'https://example.org'
+        baseurl = 'https://example.org'
 
-        text = u"""\
+        text = """\
             <html>\
             <head><title>Dummy</title><base id='my_base_tag' href='http://example.org/something' /></head>\
             <body>blahablsdfsal&amp;</body>\
@@ -291,9 +291,9 @@ def test_attributes_before_href(self):
         self.assertEqual(get_base_url(text, baseurl), 'http://example.org/something')
 
     def test_tag_name(self):
-        baseurl = u'https://example.org'
+        baseurl = 'https://example.org'
 
-        text = u"""\
+        text = """\
             <html>\
             <head><title>Dummy</title><basefoo href='http://example.org/something' /></head>\
             <body>blahablsdfsal&amp;</body>\
@@ -301,9 +301,9 @@ def test_tag_name(self):
         self.assertEqual(get_base_url(text, baseurl), 'https://example.org')
 
     def test_get_base_url_utf8(self):
-        baseurl = u'https://example.org'
+        baseurl = 'https://example.org'
 
-        text = u"""
+        text = """
             <html>
             <head><title>Dummy</title><base href='http://example.org/snowman\u2368' /></head>
             <body>blahablsdfsal&amp;</body>
@@ -314,9 +314,9 @@ def test_get_base_url_utf8(self):
     def test_get_base_url_latin1(self):
         # page encoding does not affect URL path encoding before percent-escaping
         # we should still use UTF-8 by default
-        baseurl = u'https://example.org'
+        baseurl = 'https://example.org'
 
-        text = u"""
+        text = """
             <html>
             <head><title>Dummy</title><base href='http://example.org/sterling\u00a3' /></head>
             <body>blahablsdfsal&amp;</body>
@@ -326,9 +326,9 @@ def test_get_base_url_latin1(self):
 
     def test_get_base_url_latin1_percent(self):
         # non-UTF-8 percent-encoded characters sequence are left untouched
-        baseurl = u'https://example.org'
+        baseurl = 'https://example.org'
 
-        text = u"""
+        text = """
             <html>
             <head><title>Dummy</title><base href='http://example.org/sterling%a3' /></head>
             <body>blahablsdfsal&amp;</body>
diff --git a/tests/test_http.py b/tests/test_http.py
index 01f903e8..8b934c96 100644
--- a/tests/test_http.py
+++ b/tests/test_http.py
@@ -18,10 +18,10 @@ def test_basic_auth_header(self):
 
     def test_basic_auth_header_encoding(self):
         self.assertEqual(b'Basic c29tw6Z1c8Oocjpzw7htZXDDpHNz',
-                basic_auth_header(u'somæusèr', u'sømepäss', encoding='utf8'))
+                basic_auth_header('somæusèr', 'sømepäss', encoding='utf8'))
         # default encoding (ISO-8859-1)
         self.assertEqual(b'Basic c29t5nVz6HI6c_htZXDkc3M=',
-                basic_auth_header(u'somæusèr', u'sømepäss'))
+                basic_auth_header('somæusèr', 'sømepäss'))
 
     def test_headers_raw_dict_none(self):
         self.assertIsNone(headers_raw_to_dict(None))
diff --git a/tests/test_url.py b/tests/test_url.py
index 9b854232..b76346ef 100644
--- a/tests/test_url.py
+++ b/tests/test_url.py
@@ -26,23 +26,23 @@ class UrlTests(unittest.TestCase):
 
     def test_safe_url_string(self):
         # Motoko Kusanagi (Cyborg from Ghost in the Shell)
-        motoko = u'\u8349\u8599 \u7d20\u5b50'
+        motoko = '\u8349\u8599 \u7d20\u5b50'
         self.assertEqual(safe_url_string(motoko),  # note the %20 for space
                         '%E8%8D%89%E8%96%99%20%E7%B4%A0%E5%AD%90')
         self.assertEqual(safe_url_string(motoko),
                          safe_url_string(safe_url_string(motoko)))
-        self.assertEqual(safe_url_string(u'©'), # copyright symbol
+        self.assertEqual(safe_url_string('©'), # copyright symbol
                          '%C2%A9')
         # page-encoding does not affect URL path
-        self.assertEqual(safe_url_string(u'©', 'iso-8859-1'),
+        self.assertEqual(safe_url_string('©', 'iso-8859-1'),
                          '%C2%A9')
         # path_encoding does
-        self.assertEqual(safe_url_string(u'©', path_encoding='iso-8859-1'),
+        self.assertEqual(safe_url_string('©', path_encoding='iso-8859-1'),
                          '%A9')
         self.assertEqual(safe_url_string("http://www.example.org/"),
                         'http://www.example.org/')
 
-        alessi = u'/ecommerce/oggetto/Te \xf2/tea-strainer/1273'
+        alessi = '/ecommerce/oggetto/Te \xf2/tea-strainer/1273'
 
         self.assertEqual(safe_url_string(alessi),
                          '/ecommerce/oggetto/Te%20%C3%B2/tea-strainer/1273')
@@ -54,19 +54,19 @@ def test_safe_url_string(self):
 
         # page-encoding does not affect URL path
         # we still end up UTF-8 encoding characters before percent-escaping
-        safeurl = safe_url_string(u"http://www.example.com/£")
+        safeurl = safe_url_string("http://www.example.com/£")
         self.assertTrue(isinstance(safeurl, str))
         self.assertEqual(safeurl, "http://www.example.com/%C2%A3")
 
-        safeurl = safe_url_string(u"http://www.example.com/£", encoding='utf-8')
+        safeurl = safe_url_string("http://www.example.com/£", encoding='utf-8')
         self.assertTrue(isinstance(safeurl, str))
         self.assertEqual(safeurl, "http://www.example.com/%C2%A3")
 
-        safeurl = safe_url_string(u"http://www.example.com/£", encoding='latin-1')
+        safeurl = safe_url_string("http://www.example.com/£", encoding='latin-1')
         self.assertTrue(isinstance(safeurl, str))
         self.assertEqual(safeurl, "http://www.example.com/%C2%A3")
 
-        safeurl = safe_url_string(u"http://www.example.com/£", path_encoding='latin-1')
+        safeurl = safe_url_string("http://www.example.com/£", path_encoding='latin-1')
         self.assertTrue(isinstance(safeurl, str))
         self.assertEqual(safeurl, "http://www.example.com/%A3")
 
@@ -91,43 +91,43 @@ def test_safe_url_string_unsafe_chars(self):
         self.assertEqual(safeurl, r"http://localhost:8001/unwise%7B,%7D,|,%5C,%5E,[,],%60?|=[]&[]=|")
 
     def test_safe_url_string_quote_path(self):
-        safeurl = safe_url_string(u'http://google.com/"hello"', quote_path=True)
-        self.assertEqual(safeurl, u'http://google.com/%22hello%22')
+        safeurl = safe_url_string('http://google.com/"hello"', quote_path=True)
+        self.assertEqual(safeurl, 'http://google.com/%22hello%22')
 
-        safeurl = safe_url_string(u'http://google.com/"hello"', quote_path=False)
-        self.assertEqual(safeurl, u'http://google.com/"hello"')
+        safeurl = safe_url_string('http://google.com/"hello"', quote_path=False)
+        self.assertEqual(safeurl, 'http://google.com/"hello"')
 
-        safeurl = safe_url_string(u'http://google.com/"hello"')
-        self.assertEqual(safeurl, u'http://google.com/%22hello%22')
+        safeurl = safe_url_string('http://google.com/"hello"')
+        self.assertEqual(safeurl, 'http://google.com/%22hello%22')
 
     def test_safe_url_string_with_query(self):
-        safeurl = safe_url_string(u"http://www.example.com/£?unit=µ")
+        safeurl = safe_url_string("http://www.example.com/£?unit=µ")
         self.assertTrue(isinstance(safeurl, str))
         self.assertEqual(safeurl, "http://www.example.com/%C2%A3?unit=%C2%B5")
 
-        safeurl = safe_url_string(u"http://www.example.com/£?unit=µ", encoding='utf-8')
+        safeurl = safe_url_string("http://www.example.com/£?unit=µ", encoding='utf-8')
         self.assertTrue(isinstance(safeurl, str))
         self.assertEqual(safeurl, "http://www.example.com/%C2%A3?unit=%C2%B5")
 
-        safeurl = safe_url_string(u"http://www.example.com/£?unit=µ", encoding='latin-1')
+        safeurl = safe_url_string("http://www.example.com/£?unit=µ", encoding='latin-1')
         self.assertTrue(isinstance(safeurl, str))
         self.assertEqual(safeurl, "http://www.example.com/%C2%A3?unit=%B5")
 
-        safeurl = safe_url_string(u"http://www.example.com/£?unit=µ", path_encoding='latin-1')
+        safeurl = safe_url_string("http://www.example.com/£?unit=µ", path_encoding='latin-1')
         self.assertTrue(isinstance(safeurl, str))
         self.assertEqual(safeurl, "http://www.example.com/%A3?unit=%C2%B5")
 
-        safeurl = safe_url_string(u"http://www.example.com/£?unit=µ", encoding='latin-1', path_encoding='latin-1')
+        safeurl = safe_url_string("http://www.example.com/£?unit=µ", encoding='latin-1', path_encoding='latin-1')
         self.assertTrue(isinstance(safeurl, str))
         self.assertEqual(safeurl, "http://www.example.com/%A3?unit=%B5")
 
     def test_safe_url_string_misc(self):
         # mixing Unicode and percent-escaped sequences
-        safeurl = safe_url_string(u"http://www.example.com/£?unit=%C2%B5")
+        safeurl = safe_url_string("http://www.example.com/£?unit=%C2%B5")
         self.assertTrue(isinstance(safeurl, str))
         self.assertEqual(safeurl, "http://www.example.com/%C2%A3?unit=%C2%B5")
 
-        safeurl = safe_url_string(u"http://www.example.com/%C2%A3?unit=µ")
+        safeurl = safe_url_string("http://www.example.com/%C2%A3?unit=µ")
         self.assertTrue(isinstance(safeurl, str))
         self.assertEqual(safeurl, "http://www.example.com/%C2%A3?unit=%C2%B5")
 
@@ -157,7 +157,7 @@ def test_safe_url_string_bytes_input_nonutf8(self):
         self.assertEqual(safeurl, "http://www.example.com/%A3?unit=%B5")
 
         # cp1251
-        # >>> u'Россия'.encode('cp1251')
+        # >>> 'Россия'.encode('cp1251')
         # '\xd0\xee\xf1\xf1\xe8\xff'
         safeurl = safe_url_string(b"http://www.example.com/country/\xd0\xee\xf1\xf1\xe8\xff")
         self.assertTrue(isinstance(safeurl, str))
@@ -169,36 +169,36 @@ def test_safe_url_idna(self):
         # http://unicode.org/faq/idn.html
         # + various others
         websites = (
-            (u'http://www.färgbolaget.nu/färgbolaget', 'http://www.xn--frgbolaget-q5a.nu/f%C3%A4rgbolaget'),
-            (u'http://www.räksmörgås.se/?räksmörgås=yes', 'http://www.xn--rksmrgs-5wao1o.se/?r%C3%A4ksm%C3%B6rg%C3%A5s=yes'),
-            (u'http://www.brændendekærlighed.com/brændende/kærlighed', 'http://www.xn--brndendekrlighed-vobh.com/br%C3%A6ndende/k%C3%A6rlighed'),
-            (u'http://www.예비교사.com', 'http://www.xn--9d0bm53a3xbzui.com'),
-            (u'http://理容ナカムラ.com', 'http://xn--lck1c3crb1723bpq4a.com'),
-            (u'http://あーるいん.com', 'http://xn--l8je6s7a45b.com'),
+            ('http://www.färgbolaget.nu/färgbolaget', 'http://www.xn--frgbolaget-q5a.nu/f%C3%A4rgbolaget'),
+            ('http://www.räksmörgås.se/?räksmörgås=yes', 'http://www.xn--rksmrgs-5wao1o.se/?r%C3%A4ksm%C3%B6rg%C3%A5s=yes'),
+            ('http://www.brændendekærlighed.com/brændende/kærlighed', 'http://www.xn--brndendekrlighed-vobh.com/br%C3%A6ndende/k%C3%A6rlighed'),
+            ('http://www.예비교사.com', 'http://www.xn--9d0bm53a3xbzui.com'),
+            ('http://理容ナカムラ.com', 'http://xn--lck1c3crb1723bpq4a.com'),
+            ('http://あーるいん.com', 'http://xn--l8je6s7a45b.com'),
 
             # --- real websites ---
 
             # in practice, this redirect (301) to http://www.buecher.de/?q=b%C3%BCcher
-            (u'http://www.bücher.de/?q=bücher', 'http://www.xn--bcher-kva.de/?q=b%C3%BCcher'),
+            ('http://www.bücher.de/?q=bücher', 'http://www.xn--bcher-kva.de/?q=b%C3%BCcher'),
 
             # Japanese
-            (u'http://はじめよう.みんな/?query=サ&maxResults=5', 'http://xn--p8j9a0d9c9a.xn--q9jyb4c/?query=%E3%82%B5&maxResults=5'),
+            ('http://はじめよう.みんな/?query=サ&maxResults=5', 'http://xn--p8j9a0d9c9a.xn--q9jyb4c/?query=%E3%82%B5&maxResults=5'),
 
             # Russian
-            (u'http://кто.рф/', 'http://xn--j1ail.xn--p1ai/'),
-            (u'http://кто.рф/index.php?domain=Что', 'http://xn--j1ail.xn--p1ai/index.php?domain=%D0%A7%D1%82%D0%BE'),
+            ('http://кто.рф/', 'http://xn--j1ail.xn--p1ai/'),
+            ('http://кто.рф/index.php?domain=Что', 'http://xn--j1ail.xn--p1ai/index.php?domain=%D0%A7%D1%82%D0%BE'),
 
             # Korean
-            (u'http://내도메인.한국/', 'http://xn--220b31d95hq8o.xn--3e0b707e/'),
-            (u'http://맨체스터시티축구단.한국/', 'http://xn--2e0b17htvgtvj9haj53ccob62ni8d.xn--3e0b707e/'),
+            ('http://내도메인.한국/', 'http://xn--220b31d95hq8o.xn--3e0b707e/'),
+            ('http://맨체스터시티축구단.한국/', 'http://xn--2e0b17htvgtvj9haj53ccob62ni8d.xn--3e0b707e/'),
 
             # Arabic
-            (u'http://nic.شبكة', 'http://nic.xn--ngbc5azd'),
+            ('http://nic.شبكة', 'http://nic.xn--ngbc5azd'),
 
             # Chinese
-            (u'https://www.贷款.在线', 'https://www.xn--0kwr83e.xn--3ds443g'),
-            (u'https://www2.xn--0kwr83e.在线', 'https://www2.xn--0kwr83e.xn--3ds443g'),
-            (u'https://www3.贷款.xn--3ds443g', 'https://www3.xn--0kwr83e.xn--3ds443g'),
+            ('https://www.贷款.在线', 'https://www.xn--0kwr83e.xn--3ds443g'),
+            ('https://www2.xn--0kwr83e.在线', 'https://www2.xn--0kwr83e.xn--3ds443g'),
+            ('https://www3.贷款.xn--3ds443g', 'https://www3.xn--0kwr83e.xn--3ds443g'),
         )
         for idn_input, safe_result in websites:
             safeurl = safe_url_string(idn_input)
@@ -212,23 +212,23 @@ def test_safe_url_idna(self):
     def test_safe_url_idna_encoding_failure(self):
         # missing DNS label
         self.assertEqual(
-            safe_url_string(u"http://.example.com/résumé?q=résumé"),
+            safe_url_string("http://.example.com/résumé?q=résumé"),
             "http://.example.com/r%C3%A9sum%C3%A9?q=r%C3%A9sum%C3%A9")
 
         # DNS label too long
         self.assertEqual(
             safe_url_string(
-                u"http://www.{label}.com/résumé?q=résumé".format(
-                    label=u"example"*11)),
+                "http://www.{label}.com/résumé?q=résumé".format(
+                    label="example"*11)),
             "http://www.{label}.com/r%C3%A9sum%C3%A9?q=r%C3%A9sum%C3%A9".format(
-                    label=u"example"*11))
+                    label="example"*11))
 
     def test_safe_url_port_number(self):
         self.assertEqual(
-            safe_url_string(u"http://www.example.com:80/résumé?q=résumé"),
+            safe_url_string("http://www.example.com:80/résumé?q=résumé"),
             "http://www.example.com:80/r%C3%A9sum%C3%A9?q=r%C3%A9sum%C3%A9")
         self.assertEqual(
-            safe_url_string(u"http://www.example.com:/résumé?q=résumé"),
+            safe_url_string("http://www.example.com:/résumé?q=résumé"),
             "http://www.example.com/r%C3%A9sum%C3%A9?q=r%C3%A9sum%C3%A9")
 
     def test_safe_url_string_preserve_nonfragment_hash(self):
@@ -475,7 +475,7 @@ def test_canonicalize_url(self):
                                           "http://www.example.com/")
 
     def test_return_str(self):
-        assert isinstance(canonicalize_url(u"http://www.example.com"), str)
+        assert isinstance(canonicalize_url("http://www.example.com"), str)
         assert isinstance(canonicalize_url(b"http://www.example.com"), str)
 
     def test_append_missing_path(self):
@@ -511,7 +511,7 @@ def test_keep_blank_values(self):
         self.assertEqual(canonicalize_url("http://www.example.com/do?b=&c&a=2"),
                                           "http://www.example.com/do?a=2&b=&c=")
 
-        self.assertEqual(canonicalize_url(u'http://www.example.com/do?1750,4'),
+        self.assertEqual(canonicalize_url('http://www.example.com/do?1750,4'),
                                            'http://www.example.com/do?1750%2C4=')
 
     def test_spaces(self):
@@ -523,28 +523,28 @@ def test_spaces(self):
                                           "http://www.example.com/do?a=1&q=a+space")
 
     def test_canonicalize_url_unicode_path(self):
-        self.assertEqual(canonicalize_url(u"http://www.example.com/résumé"),
+        self.assertEqual(canonicalize_url("http://www.example.com/résumé"),
                                           "http://www.example.com/r%C3%A9sum%C3%A9")
 
     def test_canonicalize_url_unicode_query_string(self):
         # default encoding for path and query is UTF-8
-        self.assertEqual(canonicalize_url(u"http://www.example.com/résumé?q=résumé"),
+        self.assertEqual(canonicalize_url("http://www.example.com/résumé?q=résumé"),
                                           "http://www.example.com/r%C3%A9sum%C3%A9?q=r%C3%A9sum%C3%A9")
 
         # passed encoding will affect query string
-        self.assertEqual(canonicalize_url(u"http://www.example.com/résumé?q=résumé", encoding='latin1'),
+        self.assertEqual(canonicalize_url("http://www.example.com/résumé?q=résumé", encoding='latin1'),
                                           "http://www.example.com/r%C3%A9sum%C3%A9?q=r%E9sum%E9")
 
-        self.assertEqual(canonicalize_url(u"http://www.example.com/résumé?country=Россия", encoding='cp1251'),
+        self.assertEqual(canonicalize_url("http://www.example.com/résumé?country=Россия", encoding='cp1251'),
                                           "http://www.example.com/r%C3%A9sum%C3%A9?country=%D0%EE%F1%F1%E8%FF")
 
     def test_canonicalize_url_unicode_query_string_wrong_encoding(self):
         # trying to encode with wrong encoding
         # fallback to UTF-8
-        self.assertEqual(canonicalize_url(u"http://www.example.com/résumé?currency=€", encoding='latin1'),
+        self.assertEqual(canonicalize_url("http://www.example.com/résumé?currency=€", encoding='latin1'),
                                           "http://www.example.com/r%C3%A9sum%C3%A9?currency=%E2%82%AC")
 
-        self.assertEqual(canonicalize_url(u"http://www.example.com/résumé?country=Россия", encoding='latin1'),
+        self.assertEqual(canonicalize_url("http://www.example.com/résumé?country=Россия", encoding='latin1'),
                                           "http://www.example.com/r%C3%A9sum%C3%A9?country=%D0%A0%D0%BE%D1%81%D1%81%D0%B8%D1%8F")
 
     def test_normalize_percent_encoding_in_paths(self):
@@ -576,28 +576,28 @@ def test_non_ascii_percent_encoding_in_paths(self):
                                           "http://www.example.com/a%20do?a=1"),
         self.assertEqual(canonicalize_url("http://www.example.com/a %20do?a=1"),
                                           "http://www.example.com/a%20%20do?a=1"),
-        self.assertEqual(canonicalize_url(u"http://www.example.com/a do£.html?a=1"),
+        self.assertEqual(canonicalize_url("http://www.example.com/a do£.html?a=1"),
                                           "http://www.example.com/a%20do%C2%A3.html?a=1")
         self.assertEqual(canonicalize_url(b"http://www.example.com/a do\xc2\xa3.html?a=1"),
                                           "http://www.example.com/a%20do%C2%A3.html?a=1")
 
     def test_non_ascii_percent_encoding_in_query_arguments(self):
-        self.assertEqual(canonicalize_url(u"http://www.example.com/do?price=£500&a=5&z=3"),
-                                          u"http://www.example.com/do?a=5&price=%C2%A3500&z=3")
+        self.assertEqual(canonicalize_url("http://www.example.com/do?price=£500&a=5&z=3"),
+                                          "http://www.example.com/do?a=5&price=%C2%A3500&z=3")
         self.assertEqual(canonicalize_url(b"http://www.example.com/do?price=\xc2\xa3500&a=5&z=3"),
                                           "http://www.example.com/do?a=5&price=%C2%A3500&z=3")
         self.assertEqual(canonicalize_url(b"http://www.example.com/do?price(\xc2\xa3)=500&a=1"),
                                           "http://www.example.com/do?a=1&price%28%C2%A3%29=500")
 
     def test_urls_with_auth_and_ports(self):
-        self.assertEqual(canonicalize_url(u"http://user:pass@www.example.com:81/do?now=1"),
-                                          u"http://user:pass@www.example.com:81/do?now=1")
+        self.assertEqual(canonicalize_url("http://user:pass@www.example.com:81/do?now=1"),
+                                          "http://user:pass@www.example.com:81/do?now=1")
 
     def test_remove_fragments(self):
-        self.assertEqual(canonicalize_url(u"http://user:pass@www.example.com/do?a=1#frag"),
-                                          u"http://user:pass@www.example.com/do?a=1")
-        self.assertEqual(canonicalize_url(u"http://user:pass@www.example.com/do?a=1#frag", keep_fragments=True),
-                                          u"http://user:pass@www.example.com/do?a=1#frag")
+        self.assertEqual(canonicalize_url("http://user:pass@www.example.com/do?a=1#frag"),
+                                          "http://user:pass@www.example.com/do?a=1")
+        self.assertEqual(canonicalize_url("http://user:pass@www.example.com/do?a=1#frag", keep_fragments=True),
+                                          "http://user:pass@www.example.com/do?a=1#frag")
 
     def test_dont_convert_safe_characters(self):
         # dont convert safe characters to percent encoding representation
@@ -611,7 +611,7 @@ def test_safe_characters_unicode(self):
         # percent-encoded as utf-8, that's why canonicalize_url must always
         # convert the urls to string. the following test asserts that
         # functionality.
-        self.assertEqual(canonicalize_url(u'http://www.example.com/caf%E9-con-leche.htm'),
+        self.assertEqual(canonicalize_url('http://www.example.com/caf%E9-con-leche.htm'),
                                            'http://www.example.com/caf%E9-con-leche.htm')
 
     def test_domains_are_case_insensitive(self):
@@ -619,10 +619,10 @@ def test_domains_are_case_insensitive(self):
                                           "http://www.example.com/")
 
     def test_canonicalize_idns(self):
-        self.assertEqual(canonicalize_url(u'http://www.bücher.de?q=bücher'),
+        self.assertEqual(canonicalize_url('http://www.bücher.de?q=bücher'),
                                            'http://www.xn--bcher-kva.de/?q=b%C3%BCcher')
         # Japanese (+ reordering query parameters)
-        self.assertEqual(canonicalize_url(u'http://はじめよう.みんな/?query=サ&maxResults=5'),
+        self.assertEqual(canonicalize_url('http://はじめよう.みんな/?query=サ&maxResults=5'),
                                            'http://xn--p8j9a0d9c9a.xn--q9jyb4c/?maxResults=5&query=%E3%82%B5')
 
     def test_quoted_slash_and_question_sign(self):
@@ -633,7 +633,7 @@ def test_quoted_slash_and_question_sign(self):
 
     def test_canonicalize_urlparsed(self):
         # canonicalize_url() can be passed an already urlparse'd URL
-        self.assertEqual(canonicalize_url(urlparse(u"http://www.example.com/résumé?q=résumé")),
+        self.assertEqual(canonicalize_url(urlparse("http://www.example.com/résumé?q=résumé")),
                                           "http://www.example.com/r%C3%A9sum%C3%A9?q=r%C3%A9sum%C3%A9")
         self.assertEqual(canonicalize_url(urlparse('http://www.example.com/caf%e9-con-leche.htm')),
                                           'http://www.example.com/caf%E9-con-leche.htm')
@@ -642,7 +642,7 @@ def test_canonicalize_urlparsed(self):
 
     def test_canonicalize_parse_url(self):
         # parse_url() wraps urlparse and is used in link extractors
-        self.assertEqual(canonicalize_url(parse_url(u"http://www.example.com/résumé?q=résumé")),
+        self.assertEqual(canonicalize_url(parse_url("http://www.example.com/résumé?q=résumé")),
                                           "http://www.example.com/r%C3%A9sum%C3%A9?q=r%C3%A9sum%C3%A9")
         self.assertEqual(canonicalize_url(parse_url('http://www.example.com/caf%e9-con-leche.htm')),
                                           'http://www.example.com/caf%E9-con-leche.htm')
@@ -650,10 +650,10 @@ def test_canonicalize_parse_url(self):
                                           "http://www.example.com/a%A3do?q=r%C3%A9sum%C3%A9")
 
     def test_canonicalize_url_idempotence(self):
-        for url, enc in [(u'http://www.bücher.de/résumé?q=résumé', 'utf8'),
-                         (u'http://www.example.com/résumé?q=résumé', 'latin1'),
-                         (u'http://www.example.com/résumé?country=Россия', 'cp1251'),
-                         (u'http://はじめよう.みんな/?query=サ&maxResults=5', 'iso2022jp')]:
+        for url, enc in [('http://www.bücher.de/résumé?q=résumé', 'utf8'),
+                         ('http://www.example.com/résumé?q=résumé', 'latin1'),
+                         ('http://www.example.com/résumé?country=Россия', 'cp1251'),
+                         ('http://はじめよう.みんな/?query=サ&maxResults=5', 'iso2022jp')]:
             canonicalized = canonicalize_url(url, encoding=enc)
 
             # if we canonicalize again, we ge the same result
@@ -665,16 +665,16 @@ def test_canonicalize_url_idempotence(self):
     def test_canonicalize_url_idna_exceptions(self):
         # missing DNS label
         self.assertEqual(
-            canonicalize_url(u"http://.example.com/résumé?q=résumé"),
+            canonicalize_url("http://.example.com/résumé?q=résumé"),
             "http://.example.com/r%C3%A9sum%C3%A9?q=r%C3%A9sum%C3%A9")
 
         # DNS label too long
         self.assertEqual(
             canonicalize_url(
-                u"http://www.{label}.com/résumé?q=résumé".format(
-                    label=u"example"*11)),
+                "http://www.{label}.com/résumé?q=résumé".format(
+                    label="example"*11)),
             "http://www.{label}.com/r%C3%A9sum%C3%A9?q=r%C3%A9sum%C3%A9".format(
-                    label=u"example"*11))
+                    label="example"*11))
 
     def test_preserve_nonfragment_hash(self):
         # don't decode `%23` to `#`
@@ -701,7 +701,7 @@ def test_default_mediatype_charset(self):
         self.assertEqual(result.data, b"A brief note")
 
     def test_text_uri(self):
-        result = parse_data_uri(u"data:,A%20brief%20note")
+        result = parse_data_uri("data:,A%20brief%20note")
         self.assertEqual(result.data, b"A brief note")
 
     def test_bytes_uri(self):
@@ -709,8 +709,8 @@ def test_bytes_uri(self):
         self.assertEqual(result.data, b"A brief note")
 
     def test_unicode_uri(self):
-        result = parse_data_uri(u"data:,é")
-        self.assertEqual(result.data, u"é".encode('utf-8'))
+        result = parse_data_uri("data:,é")
+        self.assertEqual(result.data, "é".encode('utf-8'))
 
     def test_default_mediatype(self):
         result = parse_data_uri("data:;charset=iso-8859-7,%be%d3%be")
diff --git a/w3lib/encoding.py b/w3lib/encoding.py
index 6eccb274..d96bd160 100644
--- a/w3lib/encoding.py
+++ b/w3lib/encoding.py
@@ -175,7 +175,7 @@ def read_bom(data):
 
 # Python decoder doesn't follow unicode standard when handling
 # bad utf-8 encoded strings. see http://bugs.python.org/issue8271
-codecs.register_error('w3lib_replace', lambda exc: (u'\ufffd', exc.end))
+codecs.register_error('w3lib_replace', lambda exc: ('\ufffd', exc.end))
 
 def to_unicode(data_str, encoding):
     """Convert a str object to unicode using the encoding given
@@ -244,7 +244,7 @@ def html_to_unicode(content_type_header, html_body_str,
     ... tout en accompagnant les nouvelles pratiques de cr\xc3\xa9ation \xc3\xa0 l\xe2\x80\x99\xc3\xa8re numerique.</p>
     ... </body>
     ... </html>""")
-    ('utf-8', u'<!DOCTYPE html>\n<head>\n<meta charset="UTF-8" />\n<meta name="viewport" content="width=device-width" />\n<title>Creative Commons France</title>\n<link rel=\'canonical\' href=\'http://creativecommons.fr/\' />\n<body>\n<p>Creative Commons est une organisation \xe0 but non lucratif\nqui a pour dessein de faciliter la diffusion et le partage des oeuvres\ntout en accompagnant les nouvelles pratiques de cr\xe9ation \xe0 l\u2019\xe8re numerique.</p>\n</body>\n</html>')
+    ('utf-8', '<!DOCTYPE html>\n<head>\n<meta charset="UTF-8" />\n<meta name="viewport" content="width=device-width" />\n<title>Creative Commons France</title>\n<link rel=\'canonical\' href=\'http://creativecommons.fr/\' />\n<body>\n<p>Creative Commons est une organisation \xe0 but non lucratif\nqui a pour dessein de faciliter la diffusion et le partage des oeuvres\ntout en accompagnant les nouvelles pratiques de cr\xe9ation \xe0 l\u2019\xe8re numerique.</p>\n</body>\n</html>')
     >>>
 
     '''
diff --git a/w3lib/form.py b/w3lib/form.py
index 9181b057..4081bbe7 100644
--- a/w3lib/form.py
+++ b/w3lib/form.py
@@ -46,13 +46,13 @@ def encode_multipart(data):
     end_boundary = sep_boundary + b'--'
     body = BytesIO()
     for key, value in data.items():
-        title = u'\r\nContent-Disposition: form-data; name="%s"' % key
+        title = '\r\nContent-Disposition: form-data; name="%s"' % key
         # handle multiple entries for the same name
         if type(value) != type([]):
             value = [value]
         for value in value:
             if type(value) is tuple:
-                title += u'; filename="%s"' % value[0]
+                title += '; filename="%s"' % value[0]
                 value = value[1]
             else:
                 value = unicode_to_str(value)  # in distutils: str(value).encode('utf-8')
diff --git a/w3lib/html.py b/w3lib/html.py
index cbb1a9b0..bc92de39 100644
--- a/w3lib/html.py
+++ b/w3lib/html.py
@@ -39,7 +39,7 @@ def remove_entities(text, keep=(), remove_illegal=True, encoding='utf-8'):
     return replace_entities(text, keep, remove_illegal, encoding)
 
 def replace_entities(text, keep=(), remove_illegal=True, encoding='utf-8'):
-    u"""Remove entities from the given `text` by converting them to their
+    """Remove entities from the given `text` by converting them to their
     corresponding unicode character.
 
     `text` can be a unicode string or a byte string encoded in the given
@@ -59,7 +59,7 @@ def replace_entities(text, keep=(), remove_illegal=True, encoding='utf-8'):
 
     >>> import w3lib.html
     >>> w3lib.html.replace_entities(b'Price: &pound;100')
-    u'Price: \\xa3100'
+    'Price: \\xa3100'
     >>> print(w3lib.html.replace_entities(b'Price: &pound;100'))
     Price: £100
     >>>
@@ -94,7 +94,7 @@ def convert_entity(m):
             except ValueError:
                 pass
 
-        return u'' if remove_illegal and groups.get('semicolon') else m.group(0)
+        return '' if remove_illegal and groups.get('semicolon') else m.group(0)
 
     return _ent_re.sub(convert_entity, to_unicode(text, encoding))
 
@@ -113,10 +113,10 @@ def replace_tags(text, token='', encoding=None):
     Examples:
 
     >>> import w3lib.html
-    >>> w3lib.html.replace_tags(u'This text contains <a>some tag</a>')
-    u'This text contains some tag'
+    >>> w3lib.html.replace_tags('This text contains <a>some tag</a>')
+    'This text contains some tag'
     >>> w3lib.html.replace_tags('<p>Je ne parle pas <b>fran\\xe7ais</b></p>', ' -- ', 'latin-1')
-    u' -- Je ne parle pas  -- fran\\xe7ais --  -- '
+    ' -- Je ne parle pas  -- fran\\xe7ais --  -- '
     >>>
 
     """
@@ -124,19 +124,19 @@ def replace_tags(text, token='', encoding=None):
     return _tag_re.sub(token, to_unicode(text, encoding))
 
 
-_REMOVECOMMENTS_RE = re.compile(u'<!--.*?(?:-->|$)', re.DOTALL)
+_REMOVECOMMENTS_RE = re.compile('<!--.*?(?:-->|$)', re.DOTALL)
 def remove_comments(text, encoding=None):
     """ Remove HTML Comments.
 
     >>> import w3lib.html
     >>> w3lib.html.remove_comments(b"test <!--textcoment--> whatever")
-    u'test  whatever'
+    'test  whatever'
     >>>
 
     """
 
     text = to_unicode(text, encoding)
-    return _REMOVECOMMENTS_RE.sub(u'', text)
+    return _REMOVECOMMENTS_RE.sub('', text)
 
 def remove_tags(text, which_ones=(), keep=(), encoding=None):
     """ Remove HTML Tags only.
@@ -158,19 +158,19 @@ def remove_tags(text, which_ones=(), keep=(), encoding=None):
     >>> import w3lib.html
     >>> doc = '<div><p><b>This is a link:</b> <a href="http://www.example.com">example</a></p></div>'
     >>> w3lib.html.remove_tags(doc)
-    u'This is a link: example'
+    'This is a link: example'
     >>>
 
     Keep only some tags:
 
     >>> w3lib.html.remove_tags(doc, keep=('div',))
-    u'<div>This is a link: example</div>'
+    '<div>This is a link: example</div>'
     >>>
 
     Remove only specific tags:
 
     >>> w3lib.html.remove_tags(doc, which_ones=('a','b'))
-    u'<div><p>This is a link: example</p></div>'
+    '<div><p>This is a link: example</p></div>'
     >>>
 
     You can't remove some and keep some:
@@ -197,7 +197,7 @@ def will_remove(tag):
 
     def remove_tag(m):
         tag = m.group(1)
-        return u'' if will_remove(tag) else m.group(0)
+        return '' if will_remove(tag) else m.group(0)
 
     regex = '</?([^ >/]+).*?>'
     retags = re.compile(regex, re.DOTALL | re.IGNORECASE)
@@ -213,7 +213,7 @@ def remove_tags_with_content(text, which_ones=(), encoding=None):
     >>> import w3lib.html
     >>> doc = '<div><p><b>This is a link:</b> <a href="http://www.example.com">example</a></p></div>'
     >>> w3lib.html.remove_tags_with_content(doc, which_ones=('b',))
-    u'<div><p> <a href="http://www.example.com">example</a></p></div>'
+    '<div><p> <a href="http://www.example.com">example</a></p></div>'
     >>>
 
     """
@@ -222,11 +222,11 @@ def remove_tags_with_content(text, which_ones=(), encoding=None):
     if which_ones:
         tags = '|'.join([r'<%s\b.*?</%s>|<%s\s*/>' % (tag, tag, tag) for tag in which_ones])
         retags = re.compile(tags, re.DOTALL | re.IGNORECASE)
-        text = retags.sub(u'', text)
+        text = retags.sub('', text)
     return text
 
 
-def replace_escape_chars(text, which_ones=('\n', '\t', '\r'), replace_by=u'', \
+def replace_escape_chars(text, which_ones=('\n', '\t', '\r'), replace_by='', \
         encoding=None):
     """Remove escape characters.
 
@@ -265,7 +265,7 @@ def _get_fragments(txt, pattern):
         yield txt[offset:]
 
     text = to_unicode(text, encoding)
-    ret_text = u''
+    ret_text = ''
     for fragment in _get_fragments(text, _cdata_re):
         if isinstance(fragment, str):
             # it's not a CDATA (so we try to remove its entities)
diff --git a/w3lib/url.py b/w3lib/url.py
index d27dbd52..e0624228 100644
--- a/w3lib/url.py
+++ b/w3lib/url.py
@@ -456,7 +456,7 @@ def canonicalize_url(url, keep_blank_values=True, keep_fragments=False,
     'http://www.example.com/do?a=50&b=2&b=5&c=3'
     >>>
     >>> # UTF-8 conversion + percent-encoding of non-ASCII characters
-    >>> w3lib.url.canonicalize_url(u'http://www.example.com/r\u00e9sum\u00e9')
+    >>> w3lib.url.canonicalize_url('http://www.example.com/r\u00e9sum\u00e9')
     'http://www.example.com/r%C3%A9sum%C3%A9'
     >>>
 
@@ -602,12 +602,12 @@ def urljoin_rfc(base, ref, encoding='utf-8'):
     Always returns a str.
 
     >>> import w3lib.url
-    >>> w3lib.url.urljoin_rfc('http://www.example.com/path/index.html', u'/otherpath/index2.html')
+    >>> w3lib.url.urljoin_rfc('http://www.example.com/path/index.html', '/otherpath/index2.html')
     'http://www.example.com/otherpath/index2.html'
     >>>
 
     >>> # Note: the following does not work in Python 3
-    >>> w3lib.url.urljoin_rfc(b'http://www.example.com/path/index.html', u'fran\u00e7ais/d\u00e9part.htm') # doctest: +SKIP
+    >>> w3lib.url.urljoin_rfc(b'http://www.example.com/path/index.html', 'fran\u00e7ais/d\u00e9part.htm') # doctest: +SKIP
     'http://www.example.com/path/fran\xc3\xa7ais/d\xc3\xa9part.htm'
     >>>
 

From d5750880b6da8abb33d17d723b05b9a60c150ef1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Wed, 24 Mar 2021 11:43:24 +0100
Subject: [PATCH 7/8] Remove file encoding comments

---
 docs/conf.py       | 2 --
 tests/test_form.py | 1 -
 tests/test_http.py | 2 --
 w3lib/encoding.py  | 1 -
 w3lib/html.py      | 1 -
 5 files changed, 7 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index d79efcf4..eb57263a 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-#
 # w3lib documentation build configuration file, created by
 # sphinx-quickstart on Sun Jan 26 22:19:38 2014.
 #
diff --git a/tests/test_form.py b/tests/test_form.py
index 4a6d3052..93ddab4e 100644
--- a/tests/test_form.py
+++ b/tests/test_form.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 from __future__ import absolute_import
 import warnings
 import unittest
diff --git a/tests/test_http.py b/tests/test_http.py
index 8b934c96..67dddd3e 100644
--- a/tests/test_http.py
+++ b/tests/test_http.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 import unittest
 from collections import OrderedDict
 from w3lib.http import (basic_auth_header,
diff --git a/w3lib/encoding.py b/w3lib/encoding.py
index d96bd160..4407e789 100644
--- a/w3lib/encoding.py
+++ b/w3lib/encoding.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 """
 Functions for handling encoding of web pages
 """
diff --git a/w3lib/html.py b/w3lib/html.py
index bc92de39..9f7dbf0d 100644
--- a/w3lib/html.py
+++ b/w3lib/html.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 """
 Functions for dealing with markup text
 """

From 0579cade9c8e5725457fb4ceef3c3153f3824307 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Wed, 24 Mar 2021 11:44:04 +0100
Subject: [PATCH 8/8] Remove from __future__ import absolute_import

---
 tests/test_form.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/test_form.py b/tests/test_form.py
index 93ddab4e..ac0696b5 100644
--- a/tests/test_form.py
+++ b/tests/test_form.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import
 import warnings
 import unittest
 from collections import OrderedDict