Skip to content

Commit d46ece2

Browse files
committed
Implement get_class_attributes()
1 parent bc031ae commit d46ece2

File tree

1 file changed

+23
-4
lines changed

1 file changed

+23
-4
lines changed

app/scrapers/sis_scraper.py

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -142,9 +142,26 @@ async def get_class_description(
142142
async def get_class_attributes(session: aiohttp.ClientSession, term: str, crn: str):
143143
"""
144144
Fetches and parses data from the "Attributes" tab of a class details page.
145+
146+
Returned data format is as follows:
147+
[
148+
"Attribute 1",
149+
"Attribute 2",
150+
"Attribute 3",
151+
...
152+
]
145153
"""
146154
url = "https://sis9.rpi.edu/StudentRegistrationSsb/ssb/searchResults/getSectionAttributes"
147155
params = {"term": term, "courseReferenceNumber": crn}
156+
async with session.get(url, params=params) as response:
157+
response.raise_for_status()
158+
raw_data = await response.text()
159+
soup = bs4.BeautifulSoup(raw_data, "html5lib")
160+
attributes = []
161+
attribute_tags = soup.find_all("span", {"class": "attribute-text"})
162+
for tag in attribute_tags:
163+
attributes.append(tag.text.strip())
164+
return attributes
148165

149166

150167
async def get_class_restrictions(session: aiohttp.ClientSession, term: str, crn: str):
@@ -199,20 +216,22 @@ async def process_class_details(
199216
# print(
200217
# f"Processing class: {class_entry['subject']} {class_entry['courseNumber']} - {class_entry['sequenceNumber']}"
201218
# )
219+
202220
# Fetch class details not included in main class details
221+
# TODO: Only fetch necessary details if class data is already in course_data
203222
term = class_entry["term"]
204223
crn = class_entry["courseReferenceNumber"]
205224
async with asyncio.TaskGroup() as tg:
206225
description_task = tg.create_task(get_class_description(session, term, crn))
207-
# attributes_task = tg.create_task(get_class_attributes(session, term, crn))
226+
attributes_task = tg.create_task(get_class_attributes(session, term, crn))
208227
# restrictions_task = tg.create_task(get_class_restrictions(session, term, crn))
209228
# prerequisites_task = tg.create_task(get_class_prerequisites(session, term, crn))
210229
# corequisites_task = tg.create_task(get_class_corequisites(session, term, crn))
211230
# crosslists_task = tg.create_task(get_class_crosslists(session, term, crn))
212231

213232
# Wait for tasks to complete and get results
214233
description_data = description_task.result()
215-
# attributes_data = attributes_task.result()
234+
attributes_data = attributes_task.result()
216235
# restrictions_data = restrictions_task.result()
217236
# prerequisites_data = prerequisites_task.result()
218237
# corequisites_data = corequisites_task.result()
@@ -228,7 +247,7 @@ async def process_class_details(
228247
"corequisite": [],
229248
"prerequisite": [],
230249
"crosslist": [],
231-
"attributes": [],
250+
"attributes": attributes_data,
232251
"restrictions": {
233252
"major": [],
234253
"not_major": [],
@@ -348,7 +367,7 @@ async def get_term_data(
348367
print(f"Fetching subject list for term: {term}")
349368
async with aiohttp.ClientSession() as session:
350369
subjects = await get_subjects(session, term)
351-
print(f"Found {len(subjects)} subjects for term: {term}")
370+
print(f"Processing {len(subjects)} subjects for term: {term}")
352371

353372
# Stores all course data for the term
354373
all_course_data = {}

0 commit comments

Comments
 (0)