-
Notifications
You must be signed in to change notification settings - Fork 0
/
script_importer.py
86 lines (77 loc) · 2.47 KB
/
script_importer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# load the script data from the csv file and insert into the Dgraph database
import re
import csv
import sys
import json
from datetime import datetime
from python_graphql_client import GraphqlClient
client = GraphqlClient(endpoint="http://localhost:8080/graphql")
# Path to the file containing the script data
file_path = 'data/scripts.csv'
query = """
mutation AddLine($input: [AddLineInput!]!) {
addLine(input: $input) {
line {
id
}
}
}
"""
# Read lines for each episode and insert them in bulk, one season at a time
start = datetime.now()
count = 0
skipped = 0
character_episode_map = {}
with open(file_path, 'r', newline='') as file:
reader = csv.DictReader(file)
current_season = 1.0
list = []
for row in reader:
season = float(row['Season'])
if season != current_season:
current_season = season
data = client.execute(query=query, variables={"input": list})
if 'errors' in data:
print(data["errors"])
sys.exit(1)
list = []
print("season", current_season-1.0, "inserted")
characters = re.findall(r'\b[A-Z]+\b', row['Character'])
if len(characters) == 0:
skipped += 1
continue
for character in characters:
line = {
"text": row['Dialogue'],
"character": {
"name": character
},
"episode": {
"identifier": row['SEID']
}
}
# store the character-episode mapping for stich-up (this is the one inverse edge that needs to be updated this way)
if not row["SEID"] in character_episode_map:
character_episode_map[row['SEID']] = []
character_episode_map[row['SEID']].append({
"name": character}
)
list.append(line)
count += 1
query = """
mutation UpdateEpisode($input: UpdateEpisodeInput!) {
updateEpisode(input: $input) {
numUids
}
}
"""
print("updating episodes with characters...")
for key in character_episode_map:
data = client.execute(query=query, variables={"input": {"filter": {"identifier": {"eq": key}}, "set": {"characters": character_episode_map[key]}}})
if 'errors' in data:
print(data["errors"])
sys.exit(1)
end = datetime.now()
elapsed = end - start
print(count, "lines inserted in", elapsed.microseconds / 1000, "milliseconds")
print(skipped, "lines skipped (no character found)")