forked from vaastav/Fantasy-Premier-League
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathunderstat.py
68 lines (62 loc) · 2.31 KB
/
understat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import requests
import json
from bs4 import BeautifulSoup
import re
import codecs
import pandas as pd
import os
def get_data(url):
response = requests.get(url)
if response.status_code != 200:
raise Exception("Response was code " + str(response.status_code))
html = response.text
parsed_html = BeautifulSoup(html, 'html.parser')
scripts = parsed_html.findAll('script')
filtered_scripts = []
for script in scripts:
if len(script.contents) > 0:
filtered_scripts += [script]
return scripts
def get_epl_data():
scripts = get_data("https://understat.com/league/EPL/2020")
teamData = {}
playerData = {}
for script in scripts:
for c in script.contents:
split_data = c.split('=')
data = split_data[0].strip()
if data == 'var teamsData':
content = re.findall(r'JSON\.parse\(\'(.*)\'\)',split_data[1])
decoded_content = codecs.escape_decode(content[0], "hex")[0].decode('utf-8')
teamData = json.loads(decoded_content)
elif data == 'var playersData':
content = re.findall(r'JSON\.parse\(\'(.*)\'\)',split_data[1])
decoded_content = codecs.escape_decode(content[0], "hex")[0].decode('utf-8')
playerData = json.loads(decoded_content)
return teamData, playerData
def get_player_data(id):
scripts = get_data("https://understat.com/player/" + str(id))
groupsData = {}
matchesData = {}
shotsData = {}
for script in scripts:
for c in script.contents:
split_data = c.split('=')
data = split_data[0].strip()
print(data)
def parse_epl_data(outfile_base):
teamData,playerData = get_epl_data()
new_team_data = []
for t,v in teamData.items():
new_team_data += [v]
for data in new_team_data:
team_frame = pd.DataFrame.from_records(data["history"])
team = data["title"].replace(' ', '_')
team_frame.to_csv(os.path.join(outfile_base, 'understat_' + team + '.csv'), index=False)
player_frame = pd.DataFrame.from_records(playerData)
player_frame.to_csv(os.path.join(outfile_base, 'understat_player.csv'), index=False)
def main():
#parse_epl_data('data/2019-20/understat')
get_player_data(318)
if __name__ == '__main__':
main()