Skip to content

Commit e694e6c

Browse files
committed
Import old JSON to SQL conversion code
Taken from the CARPI-Data repository, but modified so that it does not depend on any external dependencies. Code that is no longer relevant has also been removed entirely from this file.
1 parent 0722806 commit e694e6c

File tree

1 file changed

+200
-0
lines changed

1 file changed

+200
-0
lines changed

app/scrapers/json_sql_converter.py

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
import json
2+
import os
3+
4+
5+
def fetch_query_results(connection, query):
6+
cursor = connection.cursor()
7+
cursor.execute(query)
8+
results = cursor.fetchall()
9+
return results
10+
11+
12+
def get_year_sem(filename: str) -> tuple[int, str]:
13+
year = int(filename[0:4])
14+
match (filename[4:6]):
15+
case "01":
16+
return (year, "Spring")
17+
case "05":
18+
return (year, "Summer")
19+
case "09":
20+
return (year, "Fall")
21+
raise ValueError("Invalid Semester")
22+
23+
24+
def insert_course_data(connection, data):
25+
SQL_DATA = "INSERT INTO course (dept, code_num, title, desc_text, credit_min, credit_max) VALUES "
26+
for DEPARTMENT in data:
27+
for COURSE in data[DEPARTMENT]["courses"]:
28+
COURSE_DATA = data[DEPARTMENT]["courses"][COURSE]
29+
CODE_NUM = COURSE.split(" ")[1]
30+
TITLE = COURSE_DATA["course_name"].replace("'", "\\'")
31+
COURSE_DETAIL = COURSE_DATA["course_detail"]
32+
33+
DESCRIPTION = COURSE_DETAIL["description"].replace("'", "\\'")
34+
MIN_CREDIT = COURSE_DETAIL["credits"]["min"]
35+
MAX_CREDIT = COURSE_DETAIL["credits"]["max"]
36+
37+
SQL_DATA += f"('{DEPARTMENT}', '{CODE_NUM}', '{TITLE}', '{DESCRIPTION}', '{MIN_CREDIT}', '{MAX_CREDIT}'),"
38+
39+
SQL_DATA = SQL_DATA[:-1] + " ON DUPLICATE KEY UPDATE dept = dept;"
40+
# execute_query(connection, SQL_DATA)
41+
42+
43+
def insert_course_seats_data(connection, filename, data):
44+
SQL_DATA = "INSERT INTO course_seats (sem_year, semester, dept, code_num, seats_filled, seats_total) VALUES "
45+
46+
SEMESTER_YEAR, SEMESTER = get_year_sem(filename)
47+
for DEPARTMENT in data:
48+
for COURSE in data[DEPARTMENT]["courses"]:
49+
COURSE_DATA = data[DEPARTMENT]["courses"][COURSE]
50+
CODE_NUM = COURSE.split(" ")[1]
51+
COURSE_DETAIL = COURSE_DATA["course_detail"]
52+
53+
FILLED_SEATS = 0
54+
TOTAL_SEATS = 0
55+
for SECTION in COURSE_DETAIL["sections"]:
56+
FILLED_SEATS += SECTION["registered"]
57+
TOTAL_SEATS += SECTION["capacity"]
58+
59+
SQL_DATA += f"({SEMESTER_YEAR}, '{SEMESTER}', '{DEPARTMENT}', '{CODE_NUM}', {FILLED_SEATS}, {TOTAL_SEATS}),"
60+
SQL_DATA = SQL_DATA[:-1] + ";"
61+
# execute_query(connection, SQL_DATA)
62+
63+
64+
def insert_professor_data(connection, filename, data):
65+
SQL_DATA = (
66+
"INSERT INTO professor (sem_year, semester, dept, code_num, prof_name) VALUES "
67+
)
68+
69+
SEMESTER_YEAR, SEMESTER = get_year_sem(filename)
70+
for DEPARTMENT in data:
71+
for COURSE in data[DEPARTMENT]["courses"]:
72+
COURSE_DATA = data[DEPARTMENT]["courses"][COURSE]
73+
CODE_NUM = COURSE.split(" ")[1]
74+
COURSE_DETAIL = COURSE_DATA["course_detail"]
75+
76+
for SECTION in COURSE_DETAIL["sections"]:
77+
for PROFESSOR_NAME in SECTION["instructor"]:
78+
PROFESSOR_NAME = PROFESSOR_NAME.replace("'", "\\'")
79+
SQL_DATA += f"({SEMESTER_YEAR}, '{SEMESTER}', '{DEPARTMENT}', '{CODE_NUM}', '{PROFESSOR_NAME}'),"
80+
81+
SQL_DATA = SQL_DATA[:-1] + " ON DUPLICATE KEY UPDATE dept = dept;"
82+
# execute_query(connection, SQL_DATA)
83+
84+
85+
def insert_course_relationship(connection, data, year):
86+
SQL_DATA = "INSERT INTO course_relationship (dept, code_num, relationship, rel_dept, rel_code_num) VALUES "
87+
for DEPARTMENT in data:
88+
for COURSE in data[DEPARTMENT]["courses"]:
89+
COURSE_DATA = data[DEPARTMENT]["courses"][COURSE]
90+
CODE_NUM = COURSE.split(" ")[1]
91+
COURSE_DETAIL = COURSE_DATA["course_detail"]
92+
COREQUISITES = COURSE_DETAIL["corequisite"]
93+
CROSSLISTS = COURSE_DETAIL["crosslist"]
94+
95+
if CROSSLISTS != []:
96+
for crosslist in CROSSLISTS:
97+
with open("crosslist.txt", "a") as f:
98+
f.write(f"{year} {DEPARTMENT}-{CODE_NUM} -> {crosslist}\n")
99+
dept, code = crosslist.split(" ")
100+
101+
SQL_DATA += (
102+
f"('{DEPARTMENT}', '{CODE_NUM}', 'Cross', '{dept}', '{code}'),"
103+
)
104+
105+
if COREQUISITES != []:
106+
for corequisite in COREQUISITES:
107+
with open("corequisite.txt", "a") as f:
108+
f.write(f"{year} {DEPARTMENT}-{CODE_NUM} -> {corequisite}\n")
109+
# SQL_DATA += f"('{DEPARTMENT}', '{CODE_NUM}', 'Coreq', '{corequisite[0]}', '{corequisite[1]}'),"
110+
111+
SQL_DATA = SQL_DATA[:-1] + "ON DUPLICATE KEY UPDATE dept = dept;"
112+
# execute_query(connection, SQL_DATA)
113+
114+
115+
def insert_course_attributes(connection, data):
116+
SQL_DATA = "INSERT INTO course_attribute (dept, code_num, attr) VALUES "
117+
for DEPARTMENT in data:
118+
for COURSE in data[DEPARTMENT]["courses"]:
119+
COURSE_DATA = data[DEPARTMENT]["courses"][COURSE]
120+
CODE_NUM = COURSE.split(" ")[1]
121+
COURSE_DETAIL = COURSE_DATA["course_detail"]
122+
ATTRIBUTES = COURSE_DETAIL["attributes"]
123+
for attribute in ATTRIBUTES:
124+
SQL_DATA += f"('{DEPARTMENT}', '{CODE_NUM}', '{attribute}'),"
125+
SQL_DATA = SQL_DATA[:-1] + "ON DUPLICATE KEY UPDATE dept = dept;"
126+
# execute_query(connection, SQL_DATA)
127+
128+
129+
def insert_course_restriction(connection, data):
130+
SQL_DATA = "INSERT INTO course_restriction (dept, code_num, category, restr_rule, restriction) VALUES "
131+
for DEPARTMENT in data:
132+
for COURSE in data[DEPARTMENT]["courses"]:
133+
COURSE_DATA = data[DEPARTMENT]["courses"][COURSE]
134+
CODE_NUM = COURSE.split(" ")[1]
135+
COURSE_DETAIL = COURSE_DATA["course_detail"]
136+
RESTRICTIONS = COURSE_DETAIL["restrictions"]
137+
for restriction in RESTRICTIONS:
138+
# Major Restrictions
139+
for major in RESTRICTIONS["major"]:
140+
SQL_DATA += f"('{DEPARTMENT}', '{CODE_NUM}', 'Major', 'Must be', '{major}'),"
141+
for not_major in RESTRICTIONS["not_major"]:
142+
SQL_DATA += f"('{DEPARTMENT}', '{CODE_NUM}', 'Major', 'May not be', '{major}'),"
143+
144+
# Level Restrictions
145+
for level in RESTRICTIONS["level"]:
146+
SQL_DATA += f"('{DEPARTMENT}', '{CODE_NUM}', 'Level', 'Must be', '{level}'),"
147+
for not_level in RESTRICTIONS["not_level"]:
148+
SQL_DATA += f"('{DEPARTMENT}', '{CODE_NUM}', 'Level', 'May not be', '{not_level}'),"
149+
150+
# Classification Restrictions
151+
for classification in RESTRICTIONS["classification"]:
152+
SQL_DATA += f"('{DEPARTMENT}', '{CODE_NUM}', 'Classification', 'Must be', '{classification}'),"
153+
for not_classification in RESTRICTIONS["not_classification"]:
154+
SQL_DATA += f"('{DEPARTMENT}', '{CODE_NUM}', 'Classification', 'May not be', '{not_classification}'),"
155+
156+
SQL_DATA = SQL_DATA[:-1] + "ON DUPLICATE KEY UPDATE dept = dept;"
157+
158+
# execute_query(connection, SQL_DATA)
159+
160+
161+
def main():
162+
# load_dotenv()
163+
print("ENV LOADED")
164+
HOST = os.getenv("HOST")
165+
PORT = os.getenv("PORT")
166+
USER = os.getenv("USERNAME")
167+
PASS = os.getenv("PASS")
168+
DB = os.getenv("DB")
169+
170+
print("Connecting to DB")
171+
# connection = create_connection(HOST, PORT, USER, PASS, DB)
172+
173+
for files in os.walk("data"):
174+
sorted_files = sorted(files[2], reverse=True)
175+
for file in sorted_files:
176+
print("File: " + file)
177+
with open(f"Data/{file}") as f:
178+
data = json.load(f)
179+
180+
print(f" Inserting Course Data for {file}")
181+
# insert_course_data(connection, data)
182+
183+
print(f" Inserting Course Seats Data for {file}")
184+
# insert_course_seats_data(connection, file, data)
185+
186+
print(f" Inserting Professor Data for {file}")
187+
# insert_professor_data(connection, file, data)
188+
189+
# print(f" Inserting Course Relationship Data for {file}")
190+
# insert_course_relationship(connection, data, file)
191+
192+
print(f" Inserting Course Attributes for {file}")
193+
# insert_course_attributes(connection, data)
194+
195+
print(f" Inserting Course Restrictions for {file}")
196+
# insert_course_restriction(connection, data)
197+
198+
199+
if __name__ == "__main__":
200+
main()

0 commit comments

Comments
 (0)