Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New feature: auto detect URI scheme #983

Merged
merged 4 commits into from
Dec 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Changelog

## [0.4.3]
- Automatically detect the URI scheme (http or https) if no scheme is provided

## [0.4.2] - 2021.9.12
- More accurate
- Exclude responses by redirects
Expand Down
3 changes: 2 additions & 1 deletion default.conf
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,12 @@ follow-redirects = False
[connection]
timeout = 5
delay = 0
scheme = http
maxrate = 0
retries = 2
request-by-hostname = False
exit-on-error = False
# scheme = http
## By disabling `scheme` variable, dirsearch will automatically identify the URI scheme
# proxy = localhost:8080
# proxy-list = proxies.txt
# replay-proxy = localhost:8000
103 changes: 68 additions & 35 deletions lib/connection/requester.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import http.client
import random
import socket
import ssl
import thirdparty.requests as requests

from urllib.parse import urlparse, urljoin
Expand Down Expand Up @@ -56,54 +57,52 @@ def __init__(

parsed = urlparse(url)

# If no protocol specified, set http by default
# If no scheme specified, unset it first
if "://" not in url:
parsed = urlparse("{0}://{1}".format(scheme, url))

# If protocol is not supported
elif parsed.scheme not in ["https", "http"]:
raise RequestException({"message": "Unsupported URL scheme: {0}".format(parsed.scheme)})
parsed = urlparse("{0}://{1}".format(scheme or "unknown", url))

self.base_path = parsed.path
if parsed.path.startswith("/"):
self.base_path = parsed.path[1:]

# Safe quote all special characters in base_path to prevent from being encoded
self.base_path = safequote(self.base_path)
self.protocol = parsed.scheme
self.host = parsed.netloc.split(":")[0]

# Resolve DNS to decrease overhead
if ip:
self.ip = ip
# A proxy could have a different DNS that would resolve the name. ThereFore.
# resolving the name when using proxy to raise an error is pointless
elif not proxy and not proxylist:
try:
self.ip = socket.gethostbyname(self.host)
except socket.gaierror:
# Check if hostname resolves to IPv6 address only
try:
self.ip = socket.getaddrinfo(self.host, None, socket.AF_INET6)[0][4][0]
except socket.gaierror:
raise RequestException({"message": "Couldn't resolve DNS"})
port_for_scheme = {"http": 80, "https": 443, "unknown": 0}

# If no port specified, set default (80, 443)
try:
self.port = int(parsed.netloc.split(":")[1])
except IndexError:
self.port = 443 if self.protocol == "https" else 80
self.port = port_for_scheme[parsed.scheme]
except ValueError:
raise RequestException(
{"message": "Invalid port number: {0}".format(parsed.netloc.split(":")[1])}
)

# Set the Host header, this will be overwritten if the user has already set the header
# If no scheme is found, detect it by port number
self.scheme = parsed.scheme if parsed.scheme != "unknown" else self.get_scheme(self.port)

# If the user neither provide the port nor scheme, guess them based
# on standard website characteristics
if not self.scheme:
if self.get_scheme(443) == "https":
self.port = 443
self.scheme = "https"
else:
self.port = 80
self.scheme = "http"
# If the scheme is not supported
elif self.scheme not in ["https", "http"]:
raise RequestException({"message": "Unsupported URI scheme: {0}".format(self.scheme)})

# Set the Host header, read the line 126 to know why
self.headers["Host"] = self.host

# Include port in Host header if it's non-standard
if (self.protocol == "https" and self.port != 443) or (
self.protocol == "http" and self.port != 80
if (self.scheme == "https" and self.port != 443) or (
self.scheme == "http" and self.port != 80
):
self.headers["Host"] += ":{0}".format(self.port)

Expand All @@ -117,19 +116,53 @@ def __init__(
self.random_agents = None
self.auth = None
self.request_by_hostname = request_by_hostname
self.session = requests.Session()
self.url = "{0}://{1}:{2}/".format(
self.protocol,
self.host if self.request_by_hostname else self.ip,
self.port,
)
self.base_url = "{0}://{1}:{2}/".format(
self.protocol,
self.host,
self.port,
self.ip = ip
self.base_url = self.url = "{0}://{1}/".format(
self.scheme,
self.headers["Host"],
)

def setup(self):
# To improve dirsearch performance, we resolve the hostname before scanning
# and then send requests by IP instead of hostname, so the library won't have to
# resolve it before every request. This also keeps the scan stable despite any
# issue with the system DNS resolver (running tools like Amass might cause such
# things). If you don't like it, you can disable it with `-b` command-line flag
#
# Note: A proxy could have a different DNS that would resolve the name. ThereFore.
# resolving the name when using proxy to raise an error is pointless
if not self.request_by_hostname and not self.proxy and not self.proxylist:
try:
self.ip = self.ip or socket.gethostbyname(self.host)
except socket.gaierror:
# Check if hostname resolves to IPv6 address only
try:
self.ip = socket.getaddrinfo(self.host, None, socket.AF_INET6)[0][4][0]
except socket.gaierror:
raise RequestException({"message": "Couldn't resolve DNS"})

self.url = "{0}://{1}:{2}/".format(
self.scheme,
self.ip,
self.port,
)

self.session = requests.Session()
self.set_adapter()

def get_scheme(self, port):
if port == 0:
return None

s = socket.socket()
conn = ssl.SSLContext().wrap_socket(s)
try:
conn.connect((self.host, port))
conn.close()
return "https"
except Exception:
return "http"

def set_adapter(self):
self.session.mount(self.url, HTTPAdapter(max_retries=self.max_retries))

Expand Down
7 changes: 4 additions & 3 deletions lib/controller/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def __init__(self, script_path, arguments, output):
self.pass_dirs = ["/"]

if arguments.raw_file:
raw = Raw(arguments.raw_file, arguments.scheme)
raw = Raw(arguments.raw_file)
self.url_list = [raw.url]
self.httpmethod = raw.method
self.data = raw.body
Expand Down Expand Up @@ -211,7 +211,6 @@ def __init__(self, script_path, arguments, output):
try:
gc.collect()
url = url if url.endswith("/") else url + "/"
self.output.set_target(url, arguments.scheme)

try:
self.requester = Requester(
Expand All @@ -228,6 +227,8 @@ def __init__(self, script_path, arguments, output):
data=self.data,
scheme=arguments.scheme,
)
self.output.set_target(self.requester.base_url)
self.requester.setup()

for key, value in self.headers.items():
self.requester.set_header(key, value)
Expand All @@ -239,7 +240,7 @@ def __init__(self, script_path, arguments, output):
self.requester.request("")

if arguments.autosave_report or arguments.output_file:
self.report = Report(self.requester.host, self.requester.port, self.requester.protocol, self.requester.base_path)
self.report = Report(self.requester.host, self.requester.port, self.requester.scheme, self.requester.base_path)

except RequestException as e:
self.output.error(e.args[0]["message"])
Expand Down
8 changes: 2 additions & 6 deletions lib/core/argument_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,10 +284,6 @@ def __init__(self, script_path):

self.recursion_depth = options.recursion_depth

if self.scheme not in ["http", "https"]:
print("Invalid URI scheme: {0}".format(self.scheme))
exit(1)

if self.output_format and self.output_format not in ["simple", "plain", "json", "xml", "md", "csv", "html"]:
print("Select one of the following output formats: simple, plain, json, xml, md, csv, html")
exit(1)
Expand Down Expand Up @@ -394,7 +390,7 @@ def parse_config(self):
self.maxrate = config.safe_getint("connection", "max-rate", 0)
self.proxy = config.safe_get("connection", "proxy", None)
self.proxylist = config.safe_get("connection", "proxy-list", None)
self.scheme = config.safe_get("connection", "scheme", "http", ["http", "https"])
self.scheme = config.safe_get("connection", "scheme", None, ["http", "https"])
self.replay_proxy = config.safe_get("connection", "replay-proxy", None)
self.request_by_hostname = config.safe_getboolean(
"connection", "request-by-hostname", False
Expand Down Expand Up @@ -526,7 +522,7 @@ def parse_arguments(self):
default=self.proxylist, help="File contains proxy servers", metavar="FILE")
connection.add_option("--replay-proxy", action="store", dest="replay_proxy", type="string", default=self.replay_proxy,
help="Proxy to replay with found paths", metavar="PROXY")
connection.add_option("--scheme", help="Default scheme (for raw request or if there is no scheme in the URL)", action="store",
connection.add_option("--scheme", help="Default scheme for raw request or if there is no scheme in the URL (Default: auto-detect)", action="store",
default=self.scheme, dest="scheme", metavar="SCHEME")
connection.add_option("--max-rate", help="Max requests per second", action="store", dest="maxrate",
type="int", default=self.maxrate, metavar="RATE")
Expand Down
6 changes: 2 additions & 4 deletions lib/core/raw.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,9 @@


class Raw(object):
def __init__(self, raw_file, scheme):
def __init__(self, raw_file):
with File(raw_file) as raw_content:
self.raw_content = raw_content.read()

self.scheme = scheme
self.parse()

def parse(self):
Expand Down Expand Up @@ -57,7 +55,7 @@ def parse(self):

@property
def url(self):
return "{0}://{1}{2}".format(self.scheme, self.host, self.path)
return "{0}{1}".format(self.host, self.path)

@property
def method(self):
Expand Down
5 changes: 1 addition & 4 deletions lib/output/silent_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,10 +128,7 @@ def config(
):
pass

def set_target(self, target, scheme):
if not target.startswith("http://") and not target.startswith("https://") and "://" not in target:
target = "{0}://{1}".format(scheme, target)

def set_target(self, target):
self.target = target

def output_file(self, target):
Expand Down
6 changes: 1 addition & 5 deletions lib/output/verbose_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,12 +220,8 @@ def config(

self.print_header(config)

def set_target(self, target, scheme):
if not target.startswith(("http://", "https://")) and "://" not in target:
target = "{0}://{1}".format(scheme, target)

def set_target(self, target):
self.target = target

self.new_line()
self.print_header({"Target": target})
self.new_line()
Expand Down