Skip to content

Commit 6613b13

Browse files
committed
updated email crawler
1 parent 780cad2 commit 6613b13

File tree

1 file changed

+9
-14
lines changed

1 file changed

+9
-14
lines changed

08_basic_email_web_crawler.py

+9-14
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,21 @@
11
import requests
22
import re
33

4-
#get url
5-
#url=input('Enter a URL (include 'http://'):')--this is wrong
4+
# get url
65
url = input('Enter a URL (include `http://`): ')
76

7+
# connect to the url
8+
website = requests.get(url)
89

9-
#connect to the url
10-
website=requests.get(url)
10+
# read html
11+
html = website.text
1112

12-
#read html
13-
html=website.text
14-
15-
16-
#use re.findall to grab all the links
13+
# use re.findall to grab all the links
1714
links = re.findall('"((http|ftp)s?://.*?)"', html)
15+
emails = re.findall('([\w\.,]+@[\w\.,]+\.\w+)', html)
1816

19-
emails=re.findall('([\w\.,]+@[\w\.,]+\.\w+)',html)
2017

21-
22-
#prints the number of links in the list
18+
# print the number of links in the list
2319
print("\nFound {} links".format(len(links)))
24-
2520
for email in emails:
26-
print(email)
21+
print(email)

0 commit comments

Comments
 (0)