Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Packs/UrlScan/Integrations/UrlScan/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@
<h5>Command Example</h5>
<p><code>!urlscan-search searchParameter=8.8.8.8</code></p>
<p><code>!urlscan-search searchType=advanced searchParameter="filename:logo.png AND date:>now-24h"</code></p>
<p><code>!urlscan-search searchType=raw searchParameter="q=meta%3Asearchhit.search.04eb755f-468d-4421-ab86-210a01ee1bdd&datasource=hostnames&search_after="</code></p>
<h3 id="h_872696191351541575062805">2. (Deprecated) Submit a URL directly to urlscan.io</h3>
<hr>
<p>Submits a URL to urlscan.io.</p>
Expand Down
233 changes: 122 additions & 111 deletions Packs/UrlScan/Integrations/UrlScan/UrlScan.py
Original file line number Diff line number Diff line change
Expand Up @@ -668,6 +668,8 @@ def urlscan_search_only(client: Client, url: str, command_results: list, executi
def urlscan_search(client, search_type, query, size=None):
if search_type == "advanced":
r, _, _ = http_request(client, "GET", "search/?q=" + query)
elif search_type == "raw":
r, _, _ = http_request(client, "GET", f"search/?{query}")
else:
url_suffix = "search/?q=" + search_type + ':"' + query + '"' + (f"&size={size}" if size else "")
r, _, _ = http_request(client, "GET", url_suffix)
Expand Down Expand Up @@ -699,118 +701,127 @@ def urlscan_search_command(client):
search_type = "hash"
else:
search_type = "page.url"
if search_type == "raw":
r = urlscan_search(client, search_type, raw_query)
results = CommandResults(
outputs_prefix="URLScan.Search.Results",
raw_response=r,
outputs=r["results"],
readable_output=f'{r["total"]} results found for {raw_query}',
)
return_results(results)
else:
# Making the query string safe for Elastic Search
query = quote(raw_query, safe="")

r = urlscan_search(client, search_type, query)

if r["total"] == 0:
demisto.results(f"No results found for {raw_query}")
return
if r["total"] > 0:
demisto.results("{} results found for {}".format(r["total"], raw_query))

# Opening empty string for url comparison
last_url = ""
hr_md = []
cont_array = []
ip_array = []
dom_array = []
url_array = []

for res in r["results"][:LIMIT]:
ec = makehash()
cont = makehash()
url_cont = makehash()
ip_cont = makehash()
dom_cont = makehash()
file_context = makehash()
res_dict = res
res_tasks = res_dict["task"]
res_page = res_dict["page"]

if last_url == res_tasks["url"]:
continue

# Making the query string safe for Elastic Search
query = quote(raw_query, safe="")

r = urlscan_search(client, search_type, query)

if r["total"] == 0:
demisto.results(f"No results found for {raw_query}")
return
if r["total"] > 0:
demisto.results("{} results found for {}".format(r["total"], raw_query))

# Opening empty string for url comparison
last_url = ""
hr_md = []
cont_array = []
ip_array = []
dom_array = []
url_array = []

for res in r["results"][:LIMIT]:
ec = makehash()
cont = makehash()
url_cont = makehash()
ip_cont = makehash()
dom_cont = makehash()
file_context = makehash()
res_dict = res
res_tasks = res_dict["task"]
res_page = res_dict["page"]

if last_url == res_tasks["url"]:
continue

human_readable = makehash()

if "url" in res_tasks:
url = res_tasks["url"]
human_readable["URL"] = url
cont["URL"] = url
url_cont["Data"] = url
if "domain" in res_page:
domain = res_page["domain"]
human_readable["Domain"] = domain
cont["Domain"] = domain
dom_cont["Name"] = domain
if "asn" in res_page:
asn = res_page["asn"]
cont["ASN"] = asn
ip_cont["ASN"] = asn
human_readable["ASN"] = asn
if "ip" in res_page:
ip = res_page["ip"]
cont["IP"] = ip
ip_cont["Address"] = ip
human_readable["IP"] = ip
if "_id" in res_dict:
scanID = res_dict["_id"]
cont["ScanID"] = scanID
human_readable["Scan ID"] = scanID
if "time" in res_tasks:
scanDate = res_tasks["time"]
cont["ScanDate"] = scanDate
human_readable["Scan Date"] = scanDate
if "files" in res_dict:
HUMAN_READBALE_HEADERS = ["URL", "Domain", "IP", "ASN", "Scan ID", "Scan Date", "File"]
files = res_dict["files"][0]
sha256 = files.get("sha256")
filename = files.get("filename")
filesize = files.get("filesize")
filetype = files.get("mimeType")
url = res_tasks["url"]
if sha256:
human_readable["File"]["Hash"] = sha256
cont["Hash"] = sha256
file_context["SHA256"] = sha256
if filename:
human_readable["File"]["Name"] = filename
cont["FileName"] = filename
file_context["File"]["Name"] = filename
if filesize:
human_readable["File"]["Size"] = filesize
cont["FileSize"] = filesize
file_context["Size"] = filesize
if filetype:
human_readable["File"]["Type"] = filetype
cont["FileType"] = filetype
file_context["File"]["Type"] = filetype
file_context["File"]["Hostname"] = url

ec[outputPaths["file"]] = file_context
hr_md.append(human_readable)
cont_array.append(cont)
ip_array.append(ip_cont)
url_array.append(url_cont)
dom_array.append(dom_cont)

# Storing last url in memory for comparison on next loop
last_url = url

ec = {"URLScan(val.URL && val.URL == obj.URL)": cont_array, "URL": url_array, "IP": ip_array, "Domain": dom_array}
demisto.results(
{
"Type": entryTypes["note"],
"ContentsFormat": formats["markdown"],
"Contents": r,
"HumanReadable": tableToMarkdown(
f"URLScan.io query results for {raw_query}", hr_md, HUMAN_READBALE_HEADERS, removeNull=True
),
"EntryContext": ec,
}
)
human_readable = makehash()

if "url" in res_tasks:
url = res_tasks["url"]
human_readable["URL"] = url
cont["URL"] = url
url_cont["Data"] = url
if "domain" in res_page:
domain = res_page["domain"]
human_readable["Domain"] = domain
cont["Domain"] = domain
dom_cont["Name"] = domain
if "asn" in res_page:
asn = res_page["asn"]
cont["ASN"] = asn
ip_cont["ASN"] = asn
human_readable["ASN"] = asn
if "ip" in res_page:
ip = res_page["ip"]
cont["IP"] = ip
ip_cont["Address"] = ip
human_readable["IP"] = ip
if "_id" in res_dict:
scanID = res_dict["_id"]
cont["ScanID"] = scanID
human_readable["Scan ID"] = scanID
if "time" in res_tasks:
scanDate = res_tasks["time"]
cont["ScanDate"] = scanDate
human_readable["Scan Date"] = scanDate
if "files" in res_dict:
HUMAN_READBALE_HEADERS = ["URL", "Domain", "IP", "ASN", "Scan ID", "Scan Date", "File"]
files = res_dict["files"][0]
sha256 = files.get("sha256")
filename = files.get("filename")
filesize = files.get("filesize")
filetype = files.get("mimeType")
url = res_tasks["url"]
if sha256:
human_readable["File"]["Hash"] = sha256
cont["Hash"] = sha256
file_context["SHA256"] = sha256
if filename:
human_readable["File"]["Name"] = filename
cont["FileName"] = filename
file_context["File"]["Name"] = filename
if filesize:
human_readable["File"]["Size"] = filesize
cont["FileSize"] = filesize
file_context["Size"] = filesize
if filetype:
human_readable["File"]["Type"] = filetype
cont["FileType"] = filetype
file_context["File"]["Type"] = filetype
file_context["File"]["Hostname"] = url

ec[outputPaths["file"]] = file_context
hr_md.append(human_readable)
cont_array.append(cont)
ip_array.append(ip_cont)
url_array.append(url_cont)
dom_array.append(dom_cont)

# Storing last url in memory for comparison on next loop
last_url = url

ec = {"URLScan(val.URL && val.URL == obj.URL)": cont_array, "URL": url_array, "IP": ip_array, "Domain": dom_array}
demisto.results(
{
"Type": entryTypes["note"],
"ContentsFormat": formats["markdown"],
"Contents": r,
"HumanReadable": tableToMarkdown(
f"URLScan.io query results for {raw_query}", hr_md, HUMAN_READBALE_HEADERS, removeNull=True
),
"EntryContext": ec,
}
)


def format_http_transaction_list(client):
Expand Down
3 changes: 3 additions & 0 deletions Packs/UrlScan/Integrations/UrlScan/UrlScan.yml
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ script:
description: The search type. When advanced, allows to query multiple search parameters.
predefined:
- advanced
- raw
name: searchType
- defaultValue: '20'
description: The maximum number of results to return. Default is 20.
Expand Down Expand Up @@ -167,6 +168,8 @@ script:
- contextPath: URLScan.FileType
description: File type of the file scanned.
type: string
- contextPath: URLScan.Search.Results
description: The unformatted search results from a raw search.
polling: true
- arguments:
- description: The URL to scan.
Expand Down
5 changes: 5 additions & 0 deletions Packs/UrlScan/ReleaseNotes/1_2_20.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@

#### Integrations

##### urlscan.io
- Updated the urlscan.io integration to support other types of searches via the search API.
2 changes: 1 addition & 1 deletion Packs/UrlScan/pack_metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"name": "URLScan.io",
"description": "urlscan.io Web Threat Intelligence",
"support": "partner",
"currentVersion": "1.2.19",
"currentVersion": "1.2.20",
"author": "urlscan GmbH",
"url": "https://urlscan.io",
"email": "[email protected]",
Expand Down
Loading