-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbrowser_bot.py
115 lines (80 loc) · 3.25 KB
/
browser_bot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
from selenium import webdriver
from time import sleep
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.firefox.options import Options
from PIL import Image
from bs4 import BeautifulSoup
import requests
url = 'http://www.portalredevw.com.br/'
login = 'test'
password = '1234'
options = Options()
options.page_load_strategy = 'normal'
browser = webdriver.Firefox(options=options)
browser.get(url)
frame_1 = browser.find_element_by_xpath('/html/frameset/frame[1]')
browser.switch_to.frame(frame_1)
frame_2 = browser.find_element_by_xpath('//*[@id="divframe"]')
browser.switch_to.frame(frame_2)
frame_3 = browser.find_element_by_xpath('//*[@id="main"]')
browser.switch_to.frame(frame_3)
frame_4 = browser.find_element_by_xpath('//*[@id="mainApp"]')
browser.switch_to.frame(frame_4)
frame_5 = browser.find_element_by_xpath('//*[@id="HeaderMenu"]') # frame de login e senha
browser.switch_to.frame(frame_5)
input_login = browser.find_element_by_xpath('//*[@id="txtCPFCNPJ"]')
sleep(1)
input_login.send_keys(login)
input_password = browser.find_element_by_xpath('//*[@id="txtSenha"]')
sleep(1)
input_password.send_keys(password)
button = browser.find_element_by_xpath('//*[@id="btnLogin"]')
button.click()
alert_obj = browser.switch_to.alert #mudando o foco para o alert
sleep(1)
alert_obj.accept()
browser.switch_to.default_content() #voltando para o html pai
browser.switch_to.frame(frame_1)
browser.switch_to.frame(frame_2)
browser.switch_to.frame(frame_3)
browser.switch_to.frame(frame_4)
frame_6 = browser.find_element_by_xpath('//*[@id="App"]') #frame do link no rodapé
browser.switch_to.frame(frame_6)
window_before = browser.window_handles[0] #armazenar identificador da janela
link_volkswagen = browser.find_element_by_xpath('//*[@id="form1"]/table/tbody/tr[2]/td/table/tbody/tr[3]/td/table/tbody/tr/td[2]/a[1]')
link_volkswagen.click()
window_after = browser.window_handles[1] #armazenar identificador da nova janela
sleep(10)
browser.switch_to.window(window_after) #mudando de janela
# Cria e escreve no arquivo txt
body = browser.find_element_by_tag_name('body')
file_text = open('texto_vw_com_br.txt', 'w')
file_text.writelines(body.text)
file_text.close()
sleep(5)
# Salva a página em pdf
# pdfkit.from_url (browser.current_url, 'pdf_vw_com_br.pdf') salvou sem imagens
# browser.execute_script('window.print()') abriu a janela de imprimir do sistema
browser.execute_script("window.scrollTo(0,document.body.scrollHeight * 0.2)") #scroll para carregar as imagens
sleep(1)
browser.execute_script("window.scrollTo(0,document.body.scrollHeight * 0.4)")
sleep(1)
browser.execute_script("window.scrollTo(0,document.body.scrollHeight * 0.6)")
sleep(1)
browser.execute_script("window.scrollTo(0,document.body.scrollHeight * 0.8)")
sleep(1)
browser.execute_script("window.scrollTo(0,document.body.scrollHeight)")
sleep(3)
page = browser.find_element_by_tag_name('body')
page.screenshot('scrape.png') #salva a página como png
sleep(1)
print_page = Image.open(r'scrape.png')
pdf = print_page.convert('RGB')
pdf.save(r'pdf_vw_com_br.pdf') #converte a imagem em pdf
# Pegando todos os links do site
req = requests.get(browser.current_url)
soup = BeautifulSoup(req.content, 'html.parser')
for link in soup.find_all('a'):
print(link.get('href'))
# Pandas
browser.quit()