diff --git a/.ipynb_checkpoints/dsande30-checkpoint.ipynb b/.ipynb_checkpoints/dsande30-checkpoint.ipynb new file mode 100644 index 0000000..f95be97 --- /dev/null +++ b/.ipynb_checkpoints/dsande30-checkpoint.ipynb @@ -0,0 +1,524 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pprint\n", + "import re\n", + "import pymongo, json\n", + "\n", + "pp = pprint.PrettyPrinter(indent=1,width=65)\n", + "\n", + "client = pymongo.MongoClient(host=\"da1.eecs.utk.edu\")\n", + "db = client['fdac19mp2']\n", + "coll = db['dsande30']\n", + "\n", + "# Dataset 1\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'Cracked Passwords', \n", + " 'first dataset': 'List of 142k compromised passwords not to be used', \n", + " 'license': 'CC BY-NC-SA 4.0', \n", + " 'description': 'Here is a list of 142002 compromised user+passwords that you should not use. I collected them myself leaving a honeypot virtual machine to be attacked by crackers and studying their techniques. This list is a compilation of several attacks and the list of password files they left behind', \n", + " 'urls': 'https://www.kaggle.com/avibrazil/compromised-passwords'\n", + " } \n", + ")\n", + "\n", + "# Dataset 2\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'Pwned Passwords', \n", + " 'first dataset': 'Pwned Passwords', \n", + " 'license': '', \n", + " 'description': 'Pwned Passwords are 555,278,657 real world passwords previously exposed in data breaches. This exposure makes them unsuitable for ongoing use as theyre at much greater risk of being used to take over other accounts. Theyre searchable online below as well as being downloadable for use in other online systems.', \n", + " 'urls': 'https://haveibeenpwned.com/Passwords'\n", + " } \n", + ")\n", + "\n", + "# Dataset 3\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'Common Password List ( rockyou.txt )', \n", + " 'first dataset': 'Built-in Kali Linux wordlist rockyou.txt', \n", + " 'license': '', \n", + " 'description': 'Back in 2009, a company named RockYou was hacked. This wouldnt have been too much of a problem if they hadnt stored all of their passwords unencrypted, in plain text for an attacker to see. They downloaded a list of all the passwords and made it publically available.', \n", + " 'urls': 'https://www.kaggle.com/wjburns/common-password-list-rockyoutxt'\n", + " } \n", + ")\n", + "\n", + "# Dataset 4\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'Approximately how often, if at all, do you change your wifi network password? (by gender)', \n", + " 'first dataset': 'Approximately how often, if at all, do you change your wifi network password? (by gender)', \n", + " 'license': '', \n", + " 'description': 'This statistic displays the frequency with which adults change their home wifi network password in Great Britain (GB) as of March 2011, by gender. During the survey period, it was found that 2.29 percent of female respondents reported that they changed passwords once a month or more often.', \n", + " 'urls': 'https://www.statista.com/statistics/484629/frequency-with-which-adults-change-their-home-wifi-password-in-great-britain/'\n", + " } \n", + ")\n", + "\n", + "# Dataset 5\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'Typical character length of online passwords worldwide as of June 2015', \n", + " 'first dataset': 'Typical character length of online passwords worldwide as of June 2015', \n", + " 'license': '', \n", + " 'description': 'This statistic shows the typical character length of online passwords worldwide as of June 2015. During the survey period, 62 percent of respondents used passwords consisting of approximately between 8 and 12 characters.',\n", + " 'urls': 'https://www.statista.com/statistics/463400/character-length-of-internet-passwords/'\n", + " } \n", + ")\n", + "\n", + "# Dataset 6\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'Share of internet users in the United States who use the same password for multiple online accounts as of October 2018, by age group', \n", + " 'first dataset': 'Share of internet users in the United States who use the same password for multiple online accounts as of October 2018, by age group', \n", + " 'license': '', \n", + " 'description': 'This statistic shows the share of internet users in the United States who the same passwords across multiple online accounts as of October 2018, sorted by age group. During the survey period, 13 percent of respondents aged 18 to 34 years stated that all of their online accounts had the same password for login.',\n", + " 'urls': 'https://www.statista.com/statistics/676101/us-use-of-similar-online-passwords/'\n", + " } \n", + ")\n", + "\n", + "# Dataset 7\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'Could you imagine using a password manager?', \n", + " 'first dataset': 'Could you imagine using a password manager?', \n", + " 'license': '', \n", + " 'description': 'This statistic shows the results of a survey conducted in the United States in 2018 on the willingness to use password manager software. Some 26 percent of respondents state they could imagine using password manager software. ',\n", + " 'urls': 'https://www.statista.com/forecasts/988280/willingness-to-use-password-manager-software-in-the-us'\n", + " } \n", + ")\n", + "\n", + "# Dataset 8\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'Yahoo Password Frequency Corpus', \n", + " 'first dataset': 'Yahoo Password Frequency Corpus', \n", + " 'license': 'CC0', \n", + " 'description': 'This dataset includes sanitized password frequency lists collected from Yahoo in May 2011. ',\n", + " 'urls': 'https://figshare.com/articles/Yahoo_Password_Frequency_Corpus/2057937'\n", + " } \n", + ")\n", + "\n", + "# Dataset 9\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'Share of internet users in the United States who use two-factor authentication in 2013 and 2017', \n", + " 'first dataset': 'Share of internet users in the United States who use two-factor authentication in 2013 and 2017', \n", + " 'license': '', \n", + " 'description': 'This statistic shows the share of internet users in the United States who use two-factor authentication in 2013 and 2017. During the most recent survey period, 28 percent of respondents stated that they used two-factor authentication. ',\n", + " 'urls': 'https://www.statista.com/statistics/789473/us-use-of-two-factor-authentication/'\n", + " } \n", + ")\n", + "\n", + "# Dataset 10\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'Phishing website dataset', \n", + " 'first dataset': '', \n", + " 'license': 'CC0: Public Domain', \n", + " 'description': 'This website lists 30 optimized features of phishing website.',\n", + " 'urls': 'https://www.kaggle.com/akashkr/phishing-website-dataset'\n", + " } \n", + ")\n", + "\n", + "# Dataset 11\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'PHISH-IRIS DATASET: A SMALL SCALE MULTI-CLASS PHISHING WEB PAGE SCREENSHOTS DATASET', \n", + " 'first dataset': '', \n", + " 'license': 'http://creativecommons.org/licenses/by/4.0', \n", + " 'description': 'Phish-IRIS dataset is aimed for researchers to supply a ground truth dataset to evaluate their vision based multi-class anti-phishing studies.',\n", + " 'urls': 'https://www.narcis.nl/dataset/RecordID/oai%3Aeasy.dans.knaw.nl%3Aeasy-dataset%3A113150'\n", + " } \n", + ")\n", + "\n", + "# Dataset 12\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'2016 Cybersecurity Report Data Set', \n", + " 'first dataset': '', \n", + " 'license': '', \n", + " 'description': 'This set comprises the data collected to inform the 2016 Cybersecurity Report entitled Cybersecurity Are we ready in Latin America and the Caribbean?',\n", + " 'urls': 'https://mydata.iadb.org/Reform-Modernization-of-the-State/2016-Cybersecurity-Report-Data-Set/cd6z-sjjc'\n", + " } \n", + ")\n", + "\n", + "# Dataset 13\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'Annual number of data breaches and exposed records in the United States from 2005 to 2018 (in millions)', \n", + " 'first dataset': '', \n", + " 'license': '', \n", + " 'description': 'The statistic presents the recorded number of data breaches and records exposed in the United States between 2005 and 2018. ',\n", + " 'urls': 'https://www.statista.com/statistics/273550/data-breaches-recorded-in-the-united-states-by-number-of-breaches-and-records-exposed/'\n", + " } \n", + ")\n", + "\n", + "# Dataset 14\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'Share of enterprises who were victims cyberattacks on Internet of Things (IoT) systems in Italy in 2017', \n", + " 'first dataset': '', \n", + " 'license': '', \n", + " 'description': 'This statistic displays the share of enterprises who were victims of cyberattacks to Internet of Things systems in Italy in 2017',\n", + " 'urls': 'https://www.statista.com/statistics/620040/cybersecurity-cyberattack-to-iot-systems-in-italy/'\n", + " } \n", + ")\n", + "\n", + "# Dataset 15\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'India Information Technology Statistics: Cyber Security Incidents', \n", + " 'first dataset': 'India Cyber Security Incidents: Network Scanning or Probing', \n", + " 'license': '', \n", + " 'description': 'India’s Cyber Security Incidents: Network Scanning or Probing data was reported at 9,383.000 Unit in Dec 2017',\n", + " 'urls': 'https://www.ceicdata.com/en/india/information-technology-statistics-cyber-security-incidents'\n", + " } \n", + ")\n", + "\n", + "# Dataset 16\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'A BOTNET DETECTION TECHNIQUES', \n", + " 'first dataset': '', \n", + " 'license': 'CC BY 4.0', \n", + " 'description': 'Botnets are emerging as the most serious threat against cyber-security as they provide a distributed platform for several illegal activities such as launching distributed denial of service attacksagainst critical targets, malware dissemination, phishing, and click fraud.',\n", + " 'urls': 'https://figshare.com/articles/A_BOTNET_DETECTION_TECHNIQUES/1054316'\n", + " } \n", + ")\n", + "\n", + "# Dataset 17\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'Botnet Traffic Dataset', \n", + " 'first dataset': '', \n", + " 'license': '', \n", + " 'description': 'A Labeled Dataset with Botnet, Normal and Background traffic',\n", + " 'urls': 'https://www.stratosphereips.org/datasets-ctu13/'\n", + " } \n", + ")\n", + "\n", + "# Dataset 18\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'ddosflowgenRecord', \n", + " 'first dataset': '', \n", + " 'license': '', \n", + " 'description': 'ddosflowgen is a tool that models a DDoS attack and generates synthetic traffic datasets from multiple views. You can define the number of attacking networks and adjust parameters such as the attack vectors present, the amplification factor, and the number of attack sources per network.',\n", + " 'urls': 'https://www.impactcybertrust.org/dataset_view?idDataset=791'\n", + " } \n", + ")\n", + "\n", + "# Dataset 19\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'Android Botnet dataset', \n", + " 'first dataset': '', \n", + " 'license': '', \n", + " 'description': 'This dataset is a comprehensive evaluation of Android botnets, it gathered a large collection of Android botnet samples representing 14 botnet families.',\n", + " 'urls': 'https://www.impactcybertrust.org/dataset_view?idDataset=932'\n", + " } \n", + ")\n", + "\n", + "# Dataset 20\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'Mirai Scanning 2016', \n", + " 'first dataset': '', \n", + " 'license': '', \n", + " 'description': 'Network scanning attributed to the Mirai worm. The worm infects IoT devices and tries to propagate by scanning for insecure Telnet channels.',\n", + " 'urls': 'https://www.impactcybertrust.org/dataset_view?idDataset=717'\n", + " } \n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'_id': ObjectId('5d8e1b051d80d3b735d728b4'),\n", + " 'description': 'Here is a list of 142002 compromised '\n", + " 'user+passwords that you should not use. I '\n", + " 'collected them myself leaving a honeypot '\n", + " 'virtual machine to be attacked by crackers and '\n", + " 'studying their techniques. This list is a '\n", + " 'compilation of several attacks and the list of '\n", + " 'password files they left behind',\n", + " 'first dataset': 'List of 142k compromised passwords not to be '\n", + " 'used',\n", + " 'license': 'CC BY-NC-SA 4.0',\n", + " 'topic': 'Cracked Passwords',\n", + " 'urls': 'https://www.kaggle.com/avibrazil/compromised-passwords'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728b5'),\n", + " 'description': 'Pwned Passwords are 555,278,657 real world '\n", + " 'passwords previously exposed in data breaches. '\n", + " 'This exposure makes them unsuitable for '\n", + " 'ongoing use as theyre at much greater risk of '\n", + " 'being used to take over other accounts. Theyre '\n", + " 'searchable online below as well as being '\n", + " 'downloadable for use in other online systems.',\n", + " 'first dataset': 'Pwned Passwords',\n", + " 'license': '',\n", + " 'topic': 'Pwned Passwords',\n", + " 'urls': 'https://haveibeenpwned.com/Passwords'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728b6'),\n", + " 'description': 'Back in 2009, a company named RockYou was '\n", + " 'hacked. This wouldnt have been too much of a '\n", + " 'problem if they hadnt stored all of their '\n", + " 'passwords unencrypted, in plain text for an '\n", + " 'attacker to see. They downloaded a list of all '\n", + " 'the passwords and made it publically '\n", + " 'available.',\n", + " 'first dataset': 'Built-in Kali Linux wordlist rockyou.txt',\n", + " 'license': '',\n", + " 'topic': 'Common Password List ( rockyou.txt )',\n", + " 'urls': 'https://www.kaggle.com/wjburns/common-password-list-rockyoutxt'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728b7'),\n", + " 'description': 'This statistic displays the frequency with '\n", + " 'which adults change their home wifi network '\n", + " 'password in Great Britain (GB) as of March '\n", + " '2011, by gender. During the survey period, it '\n", + " 'was found that 2.29 percent of female '\n", + " 'respondents reported that they changed '\n", + " 'passwords once a month or more often.',\n", + " 'first dataset': 'Approximately how often, if at all, do you '\n", + " 'change your wifi network password? (by '\n", + " 'gender)',\n", + " 'license': '',\n", + " 'topic': 'Approximately how often, if at all, do you change '\n", + " 'your wifi network password? (by gender)',\n", + " 'urls': 'https://www.statista.com/statistics/484629/frequency-with-which-adults-change-their-home-wifi-password-in-great-britain/'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728b8'),\n", + " 'description': 'This statistic shows the typical character '\n", + " 'length of online passwords worldwide as of '\n", + " 'June 2015. During the survey period, 62 '\n", + " 'percent of respondents used passwords '\n", + " 'consisting of approximately between 8 and 12 '\n", + " 'characters.',\n", + " 'first dataset': 'Typical character length of online passwords '\n", + " 'worldwide as of June 2015',\n", + " 'license': '',\n", + " 'topic': 'Typical character length of online passwords '\n", + " 'worldwide as of June 2015',\n", + " 'urls': 'https://www.statista.com/statistics/463400/character-length-of-internet-passwords/'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728b9'),\n", + " 'description': 'This statistic shows the share of internet '\n", + " 'users in the United States who the same '\n", + " 'passwords across multiple online accounts as '\n", + " 'of October 2018, sorted by age group. During '\n", + " 'the survey period, 13 percent of respondents '\n", + " 'aged 18 to 34 years stated that all of their '\n", + " 'online accounts had the same password for '\n", + " 'login.',\n", + " 'first dataset': 'Share of internet users in the United States '\n", + " 'who use the same password for multiple '\n", + " 'online accounts as of October 2018, by age '\n", + " 'group',\n", + " 'license': '',\n", + " 'topic': 'Share of internet users in the United States who use '\n", + " 'the same password for multiple online accounts as of '\n", + " 'October 2018, by age group',\n", + " 'urls': 'https://www.statista.com/statistics/676101/us-use-of-similar-online-passwords/'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728ba'),\n", + " 'description': 'This statistic shows the results of a survey '\n", + " 'conducted in the United States in 2018 on the '\n", + " 'willingness to use password manager software. '\n", + " 'Some 26 percent of respondents state they '\n", + " 'could imagine using password manager '\n", + " 'software. ',\n", + " 'first dataset': 'Could you imagine using a password manager?',\n", + " 'license': '',\n", + " 'topic': 'Could you imagine using a password manager?',\n", + " 'urls': 'https://www.statista.com/forecasts/988280/willingness-to-use-password-manager-software-in-the-us'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728bb'),\n", + " 'description': 'This dataset includes sanitized password '\n", + " 'frequency lists collected from Yahoo in May '\n", + " '2011. ',\n", + " 'first dataset': 'Yahoo Password Frequency Corpus',\n", + " 'license': 'CC0',\n", + " 'topic': 'Yahoo Password Frequency Corpus',\n", + " 'urls': 'https://figshare.com/articles/Yahoo_Password_Frequency_Corpus/2057937'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728bc'),\n", + " 'description': 'This statistic shows the share of internet '\n", + " 'users in the United States who use two-factor '\n", + " 'authentication in 2013 and 2017. During the '\n", + " 'most recent survey period, 28 percent of '\n", + " 'respondents stated that they used two-factor '\n", + " 'authentication. ',\n", + " 'first dataset': 'Share of internet users in the United States '\n", + " 'who use two-factor authentication in 2013 '\n", + " 'and 2017',\n", + " 'license': '',\n", + " 'topic': 'Share of internet users in the United States who use '\n", + " 'two-factor authentication in 2013 and 2017',\n", + " 'urls': 'https://www.statista.com/statistics/789473/us-use-of-two-factor-authentication/'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728bd'),\n", + " 'description': 'This website lists 30 optimized features of '\n", + " 'phishing website.',\n", + " 'first dataset': '',\n", + " 'license': 'CC0: Public Domain',\n", + " 'topic': 'Phishing website dataset',\n", + " 'urls': 'https://www.kaggle.com/akashkr/phishing-website-dataset'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728be'),\n", + " 'description': 'Phish-IRIS dataset is aimed for researchers to '\n", + " 'supply a ground truth dataset to evaluate '\n", + " 'their vision based multi-class anti-phishing '\n", + " 'studies.',\n", + " 'first dataset': '',\n", + " 'license': 'http://creativecommons.org/licenses/by/4.0',\n", + " 'topic': 'PHISH-IRIS DATASET: A SMALL SCALE MULTI-CLASS '\n", + " 'PHISHING WEB PAGE SCREENSHOTS DATASET',\n", + " 'urls': 'https://www.narcis.nl/dataset/RecordID/oai%3Aeasy.dans.knaw.nl%3Aeasy-dataset%3A113150'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728bf'),\n", + " 'description': 'This set comprises the data collected to '\n", + " 'inform the 2016 Cybersecurity Report entitled '\n", + " 'Cybersecurity Are we ready in Latin America '\n", + " 'and the Caribbean?',\n", + " 'first dataset': '',\n", + " 'license': '',\n", + " 'topic': '2016 Cybersecurity Report Data Set',\n", + " 'urls': 'https://mydata.iadb.org/Reform-Modernization-of-the-State/2016-Cybersecurity-Report-Data-Set/cd6z-sjjc'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728c0'),\n", + " 'description': 'The statistic presents the recorded number of '\n", + " 'data breaches and records exposed in the '\n", + " 'United States between 2005 and 2018. ',\n", + " 'first dataset': '',\n", + " 'license': '',\n", + " 'topic': 'Annual number of data breaches and exposed records '\n", + " 'in the United States from 2005 to 2018 (in millions)',\n", + " 'urls': 'https://www.statista.com/statistics/273550/data-breaches-recorded-in-the-united-states-by-number-of-breaches-and-records-exposed/'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728c1'),\n", + " 'description': 'This statistic displays the share of '\n", + " 'enterprises who were victims of cyberattacks '\n", + " 'to Internet of Things systems in Italy in 2017',\n", + " 'first dataset': '',\n", + " 'license': '',\n", + " 'topic': 'Share of enterprises who were victims cyberattacks '\n", + " 'on Internet of Things (IoT) systems in Italy in 2017',\n", + " 'urls': 'https://www.statista.com/statistics/620040/cybersecurity-cyberattack-to-iot-systems-in-italy/'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728c2'),\n", + " 'description': 'India’s Cyber Security Incidents: Network '\n", + " 'Scanning or Probing data was reported at '\n", + " '9,383.000 Unit in Dec 2017',\n", + " 'first dataset': 'India Cyber Security Incidents: Network '\n", + " 'Scanning or Probing',\n", + " 'license': '',\n", + " 'topic': 'India Information Technology Statistics: Cyber '\n", + " 'Security Incidents',\n", + " 'urls': 'https://www.ceicdata.com/en/india/information-technology-statistics-cyber-security-incidents'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728c3'),\n", + " 'description': 'Botnets are emerging as the most serious '\n", + " 'threat against cyber-security as they provide '\n", + " 'a distributed platform for several illegal '\n", + " 'activities such as launching distributed '\n", + " 'denial of service attacksagainst critical '\n", + " 'targets, malware dissemination, phishing, and '\n", + " 'click fraud.',\n", + " 'first dataset': '',\n", + " 'license': 'CC BY 4.0',\n", + " 'topic': 'A BOTNET DETECTION TECHNIQUES',\n", + " 'urls': 'https://figshare.com/articles/A_BOTNET_DETECTION_TECHNIQUES/1054316'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728c4'),\n", + " 'description': 'A Labeled Dataset with Botnet, Normal and '\n", + " 'Background traffic',\n", + " 'first dataset': '',\n", + " 'license': '',\n", + " 'topic': 'Botnet Traffic Dataset',\n", + " 'urls': 'https://www.stratosphereips.org/datasets-ctu13/'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728c5'),\n", + " 'description': 'ddosflowgen is a tool that models a DDoS '\n", + " 'attack and generates synthetic traffic '\n", + " 'datasets from multiple views. You can define '\n", + " 'the number of attacking networks and adjust '\n", + " 'parameters such as the attack vectors present, '\n", + " 'the amplification factor, and the number of '\n", + " 'attack sources per network.',\n", + " 'first dataset': '',\n", + " 'license': '',\n", + " 'topic': 'ddosflowgenRecord',\n", + " 'urls': 'https://www.impactcybertrust.org/dataset_view?idDataset=791'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728c6'),\n", + " 'description': 'This dataset is a comprehensive evaluation of '\n", + " 'Android botnets, it gathered a large '\n", + " 'collection of Android botnet samples '\n", + " 'representing 14 botnet families.',\n", + " 'first dataset': '',\n", + " 'license': '',\n", + " 'topic': 'Android Botnet dataset',\n", + " 'urls': 'https://www.impactcybertrust.org/dataset_view?idDataset=932'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728c7'),\n", + " 'description': 'Network scanning attributed to the Mirai worm. '\n", + " 'The worm infects IoT devices and tries to '\n", + " 'propagate by scanning for insecure Telnet '\n", + " 'channels.',\n", + " 'first dataset': '',\n", + " 'license': '',\n", + " 'topic': 'Mirai Scanning 2016',\n", + " 'urls': 'https://www.impactcybertrust.org/dataset_view?idDataset=717'}\n" + ] + } + ], + "source": [ + "import pprint\n", + "import pymongo, json\n", + "client = pymongo.MongoClient (host=\"da1.eecs.utk.edu\")\n", + "db = client ['fdac19mp2']\n", + "coll = db ['dsande30']\n", + "pp = pprint.PrettyPrinter(indent=1,width=65)\n", + "for r in coll. find():\n", + " print(pp .pformat (r)) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.7" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/dsande30.ipynb b/dsande30.ipynb new file mode 100644 index 0000000..f95be97 --- /dev/null +++ b/dsande30.ipynb @@ -0,0 +1,524 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pprint\n", + "import re\n", + "import pymongo, json\n", + "\n", + "pp = pprint.PrettyPrinter(indent=1,width=65)\n", + "\n", + "client = pymongo.MongoClient(host=\"da1.eecs.utk.edu\")\n", + "db = client['fdac19mp2']\n", + "coll = db['dsande30']\n", + "\n", + "# Dataset 1\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'Cracked Passwords', \n", + " 'first dataset': 'List of 142k compromised passwords not to be used', \n", + " 'license': 'CC BY-NC-SA 4.0', \n", + " 'description': 'Here is a list of 142002 compromised user+passwords that you should not use. I collected them myself leaving a honeypot virtual machine to be attacked by crackers and studying their techniques. This list is a compilation of several attacks and the list of password files they left behind', \n", + " 'urls': 'https://www.kaggle.com/avibrazil/compromised-passwords'\n", + " } \n", + ")\n", + "\n", + "# Dataset 2\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'Pwned Passwords', \n", + " 'first dataset': 'Pwned Passwords', \n", + " 'license': '', \n", + " 'description': 'Pwned Passwords are 555,278,657 real world passwords previously exposed in data breaches. This exposure makes them unsuitable for ongoing use as theyre at much greater risk of being used to take over other accounts. Theyre searchable online below as well as being downloadable for use in other online systems.', \n", + " 'urls': 'https://haveibeenpwned.com/Passwords'\n", + " } \n", + ")\n", + "\n", + "# Dataset 3\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'Common Password List ( rockyou.txt )', \n", + " 'first dataset': 'Built-in Kali Linux wordlist rockyou.txt', \n", + " 'license': '', \n", + " 'description': 'Back in 2009, a company named RockYou was hacked. This wouldnt have been too much of a problem if they hadnt stored all of their passwords unencrypted, in plain text for an attacker to see. They downloaded a list of all the passwords and made it publically available.', \n", + " 'urls': 'https://www.kaggle.com/wjburns/common-password-list-rockyoutxt'\n", + " } \n", + ")\n", + "\n", + "# Dataset 4\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'Approximately how often, if at all, do you change your wifi network password? (by gender)', \n", + " 'first dataset': 'Approximately how often, if at all, do you change your wifi network password? (by gender)', \n", + " 'license': '', \n", + " 'description': 'This statistic displays the frequency with which adults change their home wifi network password in Great Britain (GB) as of March 2011, by gender. During the survey period, it was found that 2.29 percent of female respondents reported that they changed passwords once a month or more often.', \n", + " 'urls': 'https://www.statista.com/statistics/484629/frequency-with-which-adults-change-their-home-wifi-password-in-great-britain/'\n", + " } \n", + ")\n", + "\n", + "# Dataset 5\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'Typical character length of online passwords worldwide as of June 2015', \n", + " 'first dataset': 'Typical character length of online passwords worldwide as of June 2015', \n", + " 'license': '', \n", + " 'description': 'This statistic shows the typical character length of online passwords worldwide as of June 2015. During the survey period, 62 percent of respondents used passwords consisting of approximately between 8 and 12 characters.',\n", + " 'urls': 'https://www.statista.com/statistics/463400/character-length-of-internet-passwords/'\n", + " } \n", + ")\n", + "\n", + "# Dataset 6\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'Share of internet users in the United States who use the same password for multiple online accounts as of October 2018, by age group', \n", + " 'first dataset': 'Share of internet users in the United States who use the same password for multiple online accounts as of October 2018, by age group', \n", + " 'license': '', \n", + " 'description': 'This statistic shows the share of internet users in the United States who the same passwords across multiple online accounts as of October 2018, sorted by age group. During the survey period, 13 percent of respondents aged 18 to 34 years stated that all of their online accounts had the same password for login.',\n", + " 'urls': 'https://www.statista.com/statistics/676101/us-use-of-similar-online-passwords/'\n", + " } \n", + ")\n", + "\n", + "# Dataset 7\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'Could you imagine using a password manager?', \n", + " 'first dataset': 'Could you imagine using a password manager?', \n", + " 'license': '', \n", + " 'description': 'This statistic shows the results of a survey conducted in the United States in 2018 on the willingness to use password manager software. Some 26 percent of respondents state they could imagine using password manager software. ',\n", + " 'urls': 'https://www.statista.com/forecasts/988280/willingness-to-use-password-manager-software-in-the-us'\n", + " } \n", + ")\n", + "\n", + "# Dataset 8\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'Yahoo Password Frequency Corpus', \n", + " 'first dataset': 'Yahoo Password Frequency Corpus', \n", + " 'license': 'CC0', \n", + " 'description': 'This dataset includes sanitized password frequency lists collected from Yahoo in May 2011. ',\n", + " 'urls': 'https://figshare.com/articles/Yahoo_Password_Frequency_Corpus/2057937'\n", + " } \n", + ")\n", + "\n", + "# Dataset 9\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'Share of internet users in the United States who use two-factor authentication in 2013 and 2017', \n", + " 'first dataset': 'Share of internet users in the United States who use two-factor authentication in 2013 and 2017', \n", + " 'license': '', \n", + " 'description': 'This statistic shows the share of internet users in the United States who use two-factor authentication in 2013 and 2017. During the most recent survey period, 28 percent of respondents stated that they used two-factor authentication. ',\n", + " 'urls': 'https://www.statista.com/statistics/789473/us-use-of-two-factor-authentication/'\n", + " } \n", + ")\n", + "\n", + "# Dataset 10\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'Phishing website dataset', \n", + " 'first dataset': '', \n", + " 'license': 'CC0: Public Domain', \n", + " 'description': 'This website lists 30 optimized features of phishing website.',\n", + " 'urls': 'https://www.kaggle.com/akashkr/phishing-website-dataset'\n", + " } \n", + ")\n", + "\n", + "# Dataset 11\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'PHISH-IRIS DATASET: A SMALL SCALE MULTI-CLASS PHISHING WEB PAGE SCREENSHOTS DATASET', \n", + " 'first dataset': '', \n", + " 'license': 'http://creativecommons.org/licenses/by/4.0', \n", + " 'description': 'Phish-IRIS dataset is aimed for researchers to supply a ground truth dataset to evaluate their vision based multi-class anti-phishing studies.',\n", + " 'urls': 'https://www.narcis.nl/dataset/RecordID/oai%3Aeasy.dans.knaw.nl%3Aeasy-dataset%3A113150'\n", + " } \n", + ")\n", + "\n", + "# Dataset 12\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'2016 Cybersecurity Report Data Set', \n", + " 'first dataset': '', \n", + " 'license': '', \n", + " 'description': 'This set comprises the data collected to inform the 2016 Cybersecurity Report entitled Cybersecurity Are we ready in Latin America and the Caribbean?',\n", + " 'urls': 'https://mydata.iadb.org/Reform-Modernization-of-the-State/2016-Cybersecurity-Report-Data-Set/cd6z-sjjc'\n", + " } \n", + ")\n", + "\n", + "# Dataset 13\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'Annual number of data breaches and exposed records in the United States from 2005 to 2018 (in millions)', \n", + " 'first dataset': '', \n", + " 'license': '', \n", + " 'description': 'The statistic presents the recorded number of data breaches and records exposed in the United States between 2005 and 2018. ',\n", + " 'urls': 'https://www.statista.com/statistics/273550/data-breaches-recorded-in-the-united-states-by-number-of-breaches-and-records-exposed/'\n", + " } \n", + ")\n", + "\n", + "# Dataset 14\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'Share of enterprises who were victims cyberattacks on Internet of Things (IoT) systems in Italy in 2017', \n", + " 'first dataset': '', \n", + " 'license': '', \n", + " 'description': 'This statistic displays the share of enterprises who were victims of cyberattacks to Internet of Things systems in Italy in 2017',\n", + " 'urls': 'https://www.statista.com/statistics/620040/cybersecurity-cyberattack-to-iot-systems-in-italy/'\n", + " } \n", + ")\n", + "\n", + "# Dataset 15\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'India Information Technology Statistics: Cyber Security Incidents', \n", + " 'first dataset': 'India Cyber Security Incidents: Network Scanning or Probing', \n", + " 'license': '', \n", + " 'description': 'India’s Cyber Security Incidents: Network Scanning or Probing data was reported at 9,383.000 Unit in Dec 2017',\n", + " 'urls': 'https://www.ceicdata.com/en/india/information-technology-statistics-cyber-security-incidents'\n", + " } \n", + ")\n", + "\n", + "# Dataset 16\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'A BOTNET DETECTION TECHNIQUES', \n", + " 'first dataset': '', \n", + " 'license': 'CC BY 4.0', \n", + " 'description': 'Botnets are emerging as the most serious threat against cyber-security as they provide a distributed platform for several illegal activities such as launching distributed denial of service attacksagainst critical targets, malware dissemination, phishing, and click fraud.',\n", + " 'urls': 'https://figshare.com/articles/A_BOTNET_DETECTION_TECHNIQUES/1054316'\n", + " } \n", + ")\n", + "\n", + "# Dataset 17\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'Botnet Traffic Dataset', \n", + " 'first dataset': '', \n", + " 'license': '', \n", + " 'description': 'A Labeled Dataset with Botnet, Normal and Background traffic',\n", + " 'urls': 'https://www.stratosphereips.org/datasets-ctu13/'\n", + " } \n", + ")\n", + "\n", + "# Dataset 18\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'ddosflowgenRecord', \n", + " 'first dataset': '', \n", + " 'license': '', \n", + " 'description': 'ddosflowgen is a tool that models a DDoS attack and generates synthetic traffic datasets from multiple views. You can define the number of attacking networks and adjust parameters such as the attack vectors present, the amplification factor, and the number of attack sources per network.',\n", + " 'urls': 'https://www.impactcybertrust.org/dataset_view?idDataset=791'\n", + " } \n", + ")\n", + "\n", + "# Dataset 19\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'Android Botnet dataset', \n", + " 'first dataset': '', \n", + " 'license': '', \n", + " 'description': 'This dataset is a comprehensive evaluation of Android botnets, it gathered a large collection of Android botnet samples representing 14 botnet families.',\n", + " 'urls': 'https://www.impactcybertrust.org/dataset_view?idDataset=932'\n", + " } \n", + ")\n", + "\n", + "# Dataset 20\n", + "coll.insert_one( \n", + " { \n", + " 'topic':'Mirai Scanning 2016', \n", + " 'first dataset': '', \n", + " 'license': '', \n", + " 'description': 'Network scanning attributed to the Mirai worm. The worm infects IoT devices and tries to propagate by scanning for insecure Telnet channels.',\n", + " 'urls': 'https://www.impactcybertrust.org/dataset_view?idDataset=717'\n", + " } \n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'_id': ObjectId('5d8e1b051d80d3b735d728b4'),\n", + " 'description': 'Here is a list of 142002 compromised '\n", + " 'user+passwords that you should not use. I '\n", + " 'collected them myself leaving a honeypot '\n", + " 'virtual machine to be attacked by crackers and '\n", + " 'studying their techniques. This list is a '\n", + " 'compilation of several attacks and the list of '\n", + " 'password files they left behind',\n", + " 'first dataset': 'List of 142k compromised passwords not to be '\n", + " 'used',\n", + " 'license': 'CC BY-NC-SA 4.0',\n", + " 'topic': 'Cracked Passwords',\n", + " 'urls': 'https://www.kaggle.com/avibrazil/compromised-passwords'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728b5'),\n", + " 'description': 'Pwned Passwords are 555,278,657 real world '\n", + " 'passwords previously exposed in data breaches. '\n", + " 'This exposure makes them unsuitable for '\n", + " 'ongoing use as theyre at much greater risk of '\n", + " 'being used to take over other accounts. Theyre '\n", + " 'searchable online below as well as being '\n", + " 'downloadable for use in other online systems.',\n", + " 'first dataset': 'Pwned Passwords',\n", + " 'license': '',\n", + " 'topic': 'Pwned Passwords',\n", + " 'urls': 'https://haveibeenpwned.com/Passwords'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728b6'),\n", + " 'description': 'Back in 2009, a company named RockYou was '\n", + " 'hacked. This wouldnt have been too much of a '\n", + " 'problem if they hadnt stored all of their '\n", + " 'passwords unencrypted, in plain text for an '\n", + " 'attacker to see. They downloaded a list of all '\n", + " 'the passwords and made it publically '\n", + " 'available.',\n", + " 'first dataset': 'Built-in Kali Linux wordlist rockyou.txt',\n", + " 'license': '',\n", + " 'topic': 'Common Password List ( rockyou.txt )',\n", + " 'urls': 'https://www.kaggle.com/wjburns/common-password-list-rockyoutxt'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728b7'),\n", + " 'description': 'This statistic displays the frequency with '\n", + " 'which adults change their home wifi network '\n", + " 'password in Great Britain (GB) as of March '\n", + " '2011, by gender. During the survey period, it '\n", + " 'was found that 2.29 percent of female '\n", + " 'respondents reported that they changed '\n", + " 'passwords once a month or more often.',\n", + " 'first dataset': 'Approximately how often, if at all, do you '\n", + " 'change your wifi network password? (by '\n", + " 'gender)',\n", + " 'license': '',\n", + " 'topic': 'Approximately how often, if at all, do you change '\n", + " 'your wifi network password? (by gender)',\n", + " 'urls': 'https://www.statista.com/statistics/484629/frequency-with-which-adults-change-their-home-wifi-password-in-great-britain/'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728b8'),\n", + " 'description': 'This statistic shows the typical character '\n", + " 'length of online passwords worldwide as of '\n", + " 'June 2015. During the survey period, 62 '\n", + " 'percent of respondents used passwords '\n", + " 'consisting of approximately between 8 and 12 '\n", + " 'characters.',\n", + " 'first dataset': 'Typical character length of online passwords '\n", + " 'worldwide as of June 2015',\n", + " 'license': '',\n", + " 'topic': 'Typical character length of online passwords '\n", + " 'worldwide as of June 2015',\n", + " 'urls': 'https://www.statista.com/statistics/463400/character-length-of-internet-passwords/'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728b9'),\n", + " 'description': 'This statistic shows the share of internet '\n", + " 'users in the United States who the same '\n", + " 'passwords across multiple online accounts as '\n", + " 'of October 2018, sorted by age group. During '\n", + " 'the survey period, 13 percent of respondents '\n", + " 'aged 18 to 34 years stated that all of their '\n", + " 'online accounts had the same password for '\n", + " 'login.',\n", + " 'first dataset': 'Share of internet users in the United States '\n", + " 'who use the same password for multiple '\n", + " 'online accounts as of October 2018, by age '\n", + " 'group',\n", + " 'license': '',\n", + " 'topic': 'Share of internet users in the United States who use '\n", + " 'the same password for multiple online accounts as of '\n", + " 'October 2018, by age group',\n", + " 'urls': 'https://www.statista.com/statistics/676101/us-use-of-similar-online-passwords/'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728ba'),\n", + " 'description': 'This statistic shows the results of a survey '\n", + " 'conducted in the United States in 2018 on the '\n", + " 'willingness to use password manager software. '\n", + " 'Some 26 percent of respondents state they '\n", + " 'could imagine using password manager '\n", + " 'software. ',\n", + " 'first dataset': 'Could you imagine using a password manager?',\n", + " 'license': '',\n", + " 'topic': 'Could you imagine using a password manager?',\n", + " 'urls': 'https://www.statista.com/forecasts/988280/willingness-to-use-password-manager-software-in-the-us'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728bb'),\n", + " 'description': 'This dataset includes sanitized password '\n", + " 'frequency lists collected from Yahoo in May '\n", + " '2011. ',\n", + " 'first dataset': 'Yahoo Password Frequency Corpus',\n", + " 'license': 'CC0',\n", + " 'topic': 'Yahoo Password Frequency Corpus',\n", + " 'urls': 'https://figshare.com/articles/Yahoo_Password_Frequency_Corpus/2057937'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728bc'),\n", + " 'description': 'This statistic shows the share of internet '\n", + " 'users in the United States who use two-factor '\n", + " 'authentication in 2013 and 2017. During the '\n", + " 'most recent survey period, 28 percent of '\n", + " 'respondents stated that they used two-factor '\n", + " 'authentication. ',\n", + " 'first dataset': 'Share of internet users in the United States '\n", + " 'who use two-factor authentication in 2013 '\n", + " 'and 2017',\n", + " 'license': '',\n", + " 'topic': 'Share of internet users in the United States who use '\n", + " 'two-factor authentication in 2013 and 2017',\n", + " 'urls': 'https://www.statista.com/statistics/789473/us-use-of-two-factor-authentication/'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728bd'),\n", + " 'description': 'This website lists 30 optimized features of '\n", + " 'phishing website.',\n", + " 'first dataset': '',\n", + " 'license': 'CC0: Public Domain',\n", + " 'topic': 'Phishing website dataset',\n", + " 'urls': 'https://www.kaggle.com/akashkr/phishing-website-dataset'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728be'),\n", + " 'description': 'Phish-IRIS dataset is aimed for researchers to '\n", + " 'supply a ground truth dataset to evaluate '\n", + " 'their vision based multi-class anti-phishing '\n", + " 'studies.',\n", + " 'first dataset': '',\n", + " 'license': 'http://creativecommons.org/licenses/by/4.0',\n", + " 'topic': 'PHISH-IRIS DATASET: A SMALL SCALE MULTI-CLASS '\n", + " 'PHISHING WEB PAGE SCREENSHOTS DATASET',\n", + " 'urls': 'https://www.narcis.nl/dataset/RecordID/oai%3Aeasy.dans.knaw.nl%3Aeasy-dataset%3A113150'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728bf'),\n", + " 'description': 'This set comprises the data collected to '\n", + " 'inform the 2016 Cybersecurity Report entitled '\n", + " 'Cybersecurity Are we ready in Latin America '\n", + " 'and the Caribbean?',\n", + " 'first dataset': '',\n", + " 'license': '',\n", + " 'topic': '2016 Cybersecurity Report Data Set',\n", + " 'urls': 'https://mydata.iadb.org/Reform-Modernization-of-the-State/2016-Cybersecurity-Report-Data-Set/cd6z-sjjc'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728c0'),\n", + " 'description': 'The statistic presents the recorded number of '\n", + " 'data breaches and records exposed in the '\n", + " 'United States between 2005 and 2018. ',\n", + " 'first dataset': '',\n", + " 'license': '',\n", + " 'topic': 'Annual number of data breaches and exposed records '\n", + " 'in the United States from 2005 to 2018 (in millions)',\n", + " 'urls': 'https://www.statista.com/statistics/273550/data-breaches-recorded-in-the-united-states-by-number-of-breaches-and-records-exposed/'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728c1'),\n", + " 'description': 'This statistic displays the share of '\n", + " 'enterprises who were victims of cyberattacks '\n", + " 'to Internet of Things systems in Italy in 2017',\n", + " 'first dataset': '',\n", + " 'license': '',\n", + " 'topic': 'Share of enterprises who were victims cyberattacks '\n", + " 'on Internet of Things (IoT) systems in Italy in 2017',\n", + " 'urls': 'https://www.statista.com/statistics/620040/cybersecurity-cyberattack-to-iot-systems-in-italy/'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728c2'),\n", + " 'description': 'India’s Cyber Security Incidents: Network '\n", + " 'Scanning or Probing data was reported at '\n", + " '9,383.000 Unit in Dec 2017',\n", + " 'first dataset': 'India Cyber Security Incidents: Network '\n", + " 'Scanning or Probing',\n", + " 'license': '',\n", + " 'topic': 'India Information Technology Statistics: Cyber '\n", + " 'Security Incidents',\n", + " 'urls': 'https://www.ceicdata.com/en/india/information-technology-statistics-cyber-security-incidents'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728c3'),\n", + " 'description': 'Botnets are emerging as the most serious '\n", + " 'threat against cyber-security as they provide '\n", + " 'a distributed platform for several illegal '\n", + " 'activities such as launching distributed '\n", + " 'denial of service attacksagainst critical '\n", + " 'targets, malware dissemination, phishing, and '\n", + " 'click fraud.',\n", + " 'first dataset': '',\n", + " 'license': 'CC BY 4.0',\n", + " 'topic': 'A BOTNET DETECTION TECHNIQUES',\n", + " 'urls': 'https://figshare.com/articles/A_BOTNET_DETECTION_TECHNIQUES/1054316'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728c4'),\n", + " 'description': 'A Labeled Dataset with Botnet, Normal and '\n", + " 'Background traffic',\n", + " 'first dataset': '',\n", + " 'license': '',\n", + " 'topic': 'Botnet Traffic Dataset',\n", + " 'urls': 'https://www.stratosphereips.org/datasets-ctu13/'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728c5'),\n", + " 'description': 'ddosflowgen is a tool that models a DDoS '\n", + " 'attack and generates synthetic traffic '\n", + " 'datasets from multiple views. You can define '\n", + " 'the number of attacking networks and adjust '\n", + " 'parameters such as the attack vectors present, '\n", + " 'the amplification factor, and the number of '\n", + " 'attack sources per network.',\n", + " 'first dataset': '',\n", + " 'license': '',\n", + " 'topic': 'ddosflowgenRecord',\n", + " 'urls': 'https://www.impactcybertrust.org/dataset_view?idDataset=791'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728c6'),\n", + " 'description': 'This dataset is a comprehensive evaluation of '\n", + " 'Android botnets, it gathered a large '\n", + " 'collection of Android botnet samples '\n", + " 'representing 14 botnet families.',\n", + " 'first dataset': '',\n", + " 'license': '',\n", + " 'topic': 'Android Botnet dataset',\n", + " 'urls': 'https://www.impactcybertrust.org/dataset_view?idDataset=932'}\n", + "{'_id': ObjectId('5d8e1b061d80d3b735d728c7'),\n", + " 'description': 'Network scanning attributed to the Mirai worm. '\n", + " 'The worm infects IoT devices and tries to '\n", + " 'propagate by scanning for insecure Telnet '\n", + " 'channels.',\n", + " 'first dataset': '',\n", + " 'license': '',\n", + " 'topic': 'Mirai Scanning 2016',\n", + " 'urls': 'https://www.impactcybertrust.org/dataset_view?idDataset=717'}\n" + ] + } + ], + "source": [ + "import pprint\n", + "import pymongo, json\n", + "client = pymongo.MongoClient (host=\"da1.eecs.utk.edu\")\n", + "db = client ['fdac19mp2']\n", + "coll = db ['dsande30']\n", + "pp = pprint.PrettyPrinter(indent=1,width=65)\n", + "for r in coll. find():\n", + " print(pp .pformat (r)) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.7" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +}