Skip to content

Commit

Permalink
[submodule:extensions] Add some new bots: 360Spider, `Archive.org B…
Browse files Browse the repository at this point in the history
…ots`, `CCBot`, `DataForSeoBot`, `DuckAssistBot`, `Exabot`, `Google Bots`, `Meta Bots`, `MojeekBot`, `PerplexityBot`, `PetalBot`, `TurnitinBot`, `Yeti`, `YisouSpider`
  • Loading branch information
faisalman committed Nov 10, 2024
1 parent 3b3361f commit a0e11b7
Show file tree
Hide file tree
Showing 3 changed files with 415 additions and 12 deletions.
56 changes: 44 additions & 12 deletions src/extensions/ua-parser-extensions.js
Original file line number Diff line number Diff line change
Expand Up @@ -44,26 +44,36 @@ const Crawlers = Object.freeze({
// Amazonbot - https://developer.amazon.com/amazonbot
// Applebot - http://apple.com/go/applebot
// Bingbot - http://www.bing.com/bingbot.htm
// CCBot - https://commoncrawl.org/faq
// Dotbot - https://moz.com/help/moz-procedures/crawlers/dotbot
// DuckDuckBot - http://duckduckgo.com/duckduckbot.html
// FacebookBot - https://developers.facebook.com/docs/sharing/bot/
// GPTBot - https://platform.openai.com/docs/gptbot
// MJ12bot - https://mj12bot.com/
// OpenAI Search - https://platform.openai.com/docs/bots
// MojeekBot - https://www.mojeek.com/bot.html
// OpenAI's SearchGPT - https://platform.openai.com/docs/bots
// PerplexityBot - https://perplexity.ai/perplexitybot
// SemrushBot - http://www.semrush.com/bot.html
/((?:ahrefs|amazon|apple|bing|dot|duckduck|facebook|gpt|mj12|oai-search|semrush)bot)\/([\w\.]+)/i,
/((?:ahrefs|amazon|apple|bing|cc|dot|duckduck|exa|facebook|gpt|mj12|mojeek|oai-search|perplexity|semrush)bot)\/([\w\.]+)/i,

// Baiduspider https://help.baidu.com/question?prod_id=99&class=0&id=3001
/(baiduspider)[-imagevdonsfcpr]{0,6}\/([\w\.]+)/i,

// ClaudeBot
// ClaudeBot (Anthropic)
/(claude(?:bot|-web))\/([\w\.]+)/i,

// Coc Coc Bot - https://help.coccoc.com/en/search-engine
/(coccocbot-(?:image|web))\/([\w\.]+)/i,

// Facebook / Meta
// https://developers.facebook.com/docs/sharing/webmasters/web-crawlers
/(facebook(?:externalhit|catalog)|meta-externalagent)\/([\w\.]+)/i,

// Googlebot - http://www.google.com/bot.html
/(google(?:bot|other)(?:-image|-video|-news|-extended)?|(?:storebot-)?google(?:-inspectiontool)?)\/?([\w\.]*)/i,
/(google(?:bot|other|-inspectiontool)(?:-image|-video|-news)?|storebot-google)\/?([\w\.]*)/i,

// Internet Archive (archive.org)
/(ia_archiver|archive\.org_bot)\/?([\w\.]*)/i,

// Sogou Spider
/(sogou (?:pic|head|web|orion|news) spider)\/([\w\.]+)/i,
Expand All @@ -72,14 +82,29 @@ const Crawlers = Object.freeze({
/(y!?j-(?:asr|br[uw]|dscv|mmp|vsidx|wsc))\/([\w\.]+)/i,

// Yandex Bots - https://yandex.com/bots
/(yandex(?:(?:mobile)?(?:accessibility|additional|renderresources|screenshot|sprav)?bot|image(?:s|resizer)|video(?:parser)?|blogs|adnet|favicons|fordomain|market|media|metrika|news|ontodb(?:api)?|pagechecker|partner|rca|tracker|turbo|vertis|webmaster|antivirus))\/([\w\.]+)/i
/(yandex(?:(?:mobile)?(?:accessibility|additional|renderresources|screenshot|sprav)?bot|image(?:s|resizer)|video(?:parser)?|blogs|adnet|favicons|fordomain|market|media|metrika|news|ontodb(?:api)?|pagechecker|partner|rca|tracker|turbo|vertis|webmaster|antivirus))\/([\w\.]+)/i,

// Yeti (Naver)
/(yeti)\/([\w\.]+)/i,

// YisouSpider
/(yisouspider)\/?([\w\.]*)/i
],

[NAME, VERSION, [TYPE, CRAWLER]],

// Bytespider
// Yahoo! Slurp - http://help.yahoo.com/help/us/ysearch/slurp
[/((?:bytespider|(?=yahoo! )slurp))/i],
[
// Google Bots
/((?:adsbot|apis|mediapartners)-google(?:-mobile)?|google-?(?:other|cloudvertexbot|extended|safety))/i,

// Bytespider
// DataForSeoBot - https://dataforseo.com/dataforseo-bot
// Huawei AspiegelBot / PetalBot https://aspiegel.com/petalbot
// Qihoo 360Spider
// TurnitinBot - https://www.turnitin.com/robot/crawlerinfo.html
// Yahoo! Slurp - http://help.yahoo.com/help/us/ysearch/slurp
/(360spider-?(?:image|video)?|bytespider|(?:aspiegel|dataforseo|petal|turnitin)bot|(?=yahoo! )slurp)/i
],
[NAME, [TYPE, CRAWLER]]
]
});
Expand Down Expand Up @@ -184,8 +209,15 @@ const Fetchers = Object.freeze({
[
// AhrefsSiteAudit - https://ahrefs.com/robot/site-audit
// ChatGPT-User - https://platform.openai.com/docs/plugins/bot
// DuckAssistBot - https://duckduckgo.com/duckassistbot/
// BingPreview / Mastodon / Pinterestbot / Redditbot / Rogerbot / Telegrambot / Twitterbot / UptimeRobot
/(ahrefssiteaudit|bingpreview|chatgpt-user|mastodon|(?:discord|linkedin|pinterest|reddit|roger|telegram|twitter|uptimero)bot)\/([\w\.]+)/i,
/(ahrefssiteaudit|bingpreview|chatgpt-user|mastodon|(?:discord|duckassist|linkedin|pinterest|reddit|roger|telegram|twitter|uptimero)bot)\/([\w\.]+)/i,

// Google Site Verifier
/(google-site-verification)\/([\w\.]+)/i,

// Meta
/(meta-externalfetcher)\/([\w\.]+)/i,

// Slackbot - https://api.slack.com/robots
/(slack(?:bot)?(?:-imgproxy|-linkexpanding)?) ([\w\.]+)/i,
Expand All @@ -203,7 +235,7 @@ const Fetchers = Object.freeze({
[NAME, VERSION, [TYPE, FETCHER]],

// Google Bots / Snapchat
[/(feedfetcher-google|google-read-aloud|(?=bot; )snapchat)/i],
[/(feedfetcher-google|google(?:-read-aloud|producer)|(?=bot; )snapchat)/i],
[NAME, [TYPE, FETCHER]],
]
});
Expand Down Expand Up @@ -252,8 +284,8 @@ const MediaPlayers = Object.freeze({
/(flrp)\/([\w\.-]+)/i // Flip Player
], [[NAME, 'Flip Player'], VERSION, [TYPE, MEDIAPLAYER]], [

/(fstream|nativehost|queryseekspider|ia-archiver|facebookexternalhit)/i
// FStream/NativeHost/QuerySeekSpider/IA Archiver/facebookexternalhit
/(fstream|nativehost|queryseekspider)/i
// FStream/NativeHost/QuerySeekSpider
], [NAME, [TYPE, MEDIAPLAYER]], [

/(gstreamer) souphttpsrc.+libsoup\/([\w\.-]+)/i
Expand Down
Loading

0 comments on commit a0e11b7

Please sign in to comment.