|
| 1 | +#!/usr/bin/env python |
| 2 | +# -*- coding: utf-8 -*- |
| 3 | +# @Time : 2020/7/24 18:33 |
| 4 | +# @Author : 彦小明IT |
| 5 | +# @File : ticketSelect.py |
| 6 | +# @Software: PyCharm |
| 7 | +# @description: 实现火车票查询 输入:重庆 成都 2020-08-03 |
| 8 | + # 爬虫项目,requests(不要),selenium的webdriver |
| 9 | + # 1.入口页面 |
| 10 | + # 2.信息如何填充 |
| 11 | + # 3.获取信息, 如何展示 |
| 12 | + |
| 13 | +from selenium import webdriver |
| 14 | +import time |
| 15 | +from lxml import etree |
| 16 | +from prettytable import PrettyTable |
| 17 | +from color import Colored |
| 18 | + |
| 19 | + |
| 20 | +# 判断元素是否存在,返回对应的文本 |
| 21 | +def getSeatInfo(target): |
| 22 | + t = target.xpath('./div') |
| 23 | + if t: |
| 24 | + return t[0].xpath('./text()') |
| 25 | + else: |
| 26 | + return target.xpath('./text()') |
| 27 | + |
| 28 | + |
| 29 | +# 获取票信息 |
| 30 | +def getTicketInfo(start, end, date): |
| 31 | + color = Colored() |
| 32 | + info = [] |
| 33 | + options = webdriver.ChromeOptions() |
| 34 | + options.add_argument('headless') # 设置后台运行 |
| 35 | + options.add_argument('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36') |
| 36 | + driver = webdriver.Chrome(options=options) |
| 37 | + driver.get(r'https://www.12306.cn/index/') |
| 38 | + time.sleep(5) # 可以等待相应元素出现之后,在进行接下来的逻辑操作 |
| 39 | + # 模拟出发地点击 |
| 40 | + driver.find_element_by_xpath('//*[@id="fromStationText"]').clear() |
| 41 | + driver.find_element_by_xpath('//*[@id="fromStationText"]').click() |
| 42 | + driver.find_element_by_xpath('//*[@id="fromStationText"]').send_keys(start) |
| 43 | + from_text = driver.find_elements_by_class_name('ralign') # 使用tab键 |
| 44 | + # 模拟输入城市,会产生推荐城市名的选择 |
| 45 | + for i in from_text: |
| 46 | + if i.text == start: |
| 47 | + i.click() |
| 48 | + break |
| 49 | + # 模拟目的地点击 |
| 50 | + driver.find_element_by_xpath('//*[@id="toStationText"]').clear() |
| 51 | + driver.find_element_by_xpath('//*[@id="toStationText"]').click() |
| 52 | + driver.find_element_by_xpath('//*[@id="toStationText"]').send_keys(end) |
| 53 | + from_text = driver.find_elements_by_class_name('ralign') |
| 54 | + for i in from_text: |
| 55 | + if i.text == end: |
| 56 | + i.click() |
| 57 | + break |
| 58 | + # 模拟日期 |
| 59 | + t = driver.find_element_by_xpath('//*[@id="train_date"]') |
| 60 | + driver.execute_script('arguments[0].removeAttribute(\"readonly\")', t); |
| 61 | + t.clear() |
| 62 | + t.send_keys(date) |
| 63 | + # 单击查询按钮,使用js脚本进行单击 |
| 64 | + t = driver.find_element_by_xpath('//*[@id="search_one"]') |
| 65 | + # t.click() |
| 66 | + driver.execute_script("arguments[0].click();", t) |
| 67 | + # 会产生新的页面,切换到数据页 |
| 68 | + windows = driver.window_handles |
| 69 | + driver.close() |
| 70 | + driver.switch_to.window(windows[-1]) |
| 71 | + time.sleep(5) |
| 72 | + # 解析页面,获取所需的数据 |
| 73 | + selector = etree.HTML(driver.page_source) |
| 74 | + trs = selector.xpath('/html/body/div[8]/div[7]/table/tbody[1]/tr') |
| 75 | + for tr in trs[::2]: |
| 76 | + tds = tr.xpath('./td') |
| 77 | + trainNO = tds[0].xpath('./div/div[1]/div/a/text()') |
| 78 | + fromStation = tds[0].xpath('./div/div[2]/strong[1]/text()') |
| 79 | + toStation = tds[0].xpath('./div/div[2]/strong[2]/text()') |
| 80 | + fromTime = tds[0].xpath('./div/div[3]/strong[1]/text()') |
| 81 | + toTime = tds[0].xpath('./div/div[3]/strong[2]/text()') |
| 82 | + totalTime = tds[0].xpath('./div/div[4]/strong[1]/text()') |
| 83 | + timeDesc = tds[0].xpath('./div/div[4]/span/text()') |
| 84 | + specialSeat = getSeatInfo(tds[1]) |
| 85 | + firstSeat = getSeatInfo(tds[2]) |
| 86 | + secondSeat = getSeatInfo(tds[3]) |
| 87 | + softSleeper = getSeatInfo(tds[4]) |
| 88 | + firstSleeper = getSeatInfo(tds[5]) |
| 89 | + moveSleeper = getSeatInfo(tds[6]) |
| 90 | + secondSleeper = getSeatInfo(tds[7]) |
| 91 | + softSeat = getSeatInfo(tds[8]) |
| 92 | + hardSeat = getSeatInfo(tds[9]) |
| 93 | + noSeat = getSeatInfo(tds[10]) |
| 94 | + remark = tds[12].xpath('./a') |
| 95 | + if remark: |
| 96 | + remark = remark[0].xpath('./text()') |
| 97 | + else: |
| 98 | + remark = tds[12].xpath('./text()') |
| 99 | + trainNO = [color.blue(trainNO[0])] |
| 100 | + fromToStation = [color.green(fromStation[0])+'\n'+color.red(toStation[0])] |
| 101 | + fromToTime = [color.green(fromTime[0])+'\n'+color.red(toTime[0])] |
| 102 | + totalTime = [totalTime[0]+'\n'+ color.dim(timeDesc[0])] |
| 103 | + info.append(trainNO+fromToStation+fromToTime+totalTime+specialSeat+ |
| 104 | + firstSeat+secondSeat+softSleeper+firstSleeper+moveSleeper+secondSleeper+softSeat+hardSeat+noSeat+remark) |
| 105 | + # print(info) |
| 106 | + return info |
| 107 | + |
| 108 | + |
| 109 | +if __name__ == '__main__': |
| 110 | + print("请输入查询车次(示例:重庆 成都 2020-07-15)") |
| 111 | + t = input() |
| 112 | + t = t.split(" ") |
| 113 | + if len(t) == 3: |
| 114 | + ticketInfo = getTicketInfo(t[0], t[1], t[2]) |
| 115 | + table = PrettyTable() |
| 116 | + table.valign = 'm' |
| 117 | + table.field_names = ['车次', '出发站/终到站', '出发时间/到达时间', '历时', '特等座', '一等座', |
| 118 | + '二等座', '软卧', '一等卧', '动卧', '二等卧', '软座', '硬座', '无座', '备注'] |
| 119 | + for info in ticketInfo: |
| 120 | + if len(info) == len(table.field_names): |
| 121 | + table.add_row(info) |
| 122 | + table.add_row([" "]*len(info)) |
| 123 | + print(table) |
| 124 | + else: |
| 125 | + print("输入信息有误......") |
| 126 | + |
0 commit comments