Skip to content

Commit d13ee57

Browse files
committed
Remove extraneous data from course descriptions
1 parent 4ac8448 commit d13ee57

File tree

1 file changed

+11
-30
lines changed

1 file changed

+11
-30
lines changed

app/scrapers/sis_api.py

Lines changed: 11 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -224,49 +224,30 @@ async def class_search(
224224

225225
async def get_class_description(
226226
session: aiohttp.ClientSession, term: str, crn: str
227-
) -> dict[str, str]:
227+
) -> str:
228228
"""
229229
Fetches and parses data from the "Course Description" tab of a class details page.
230230
231-
Returned data format is as follows:
232-
{
233-
"description": "This course provides an introduction to ...",
234-
"when_offered": "Spring, Summer, and Fall"
235-
}
231+
Returns a string containing the course description, without any additional fields
232+
such as "When Offered", "Credit Hours", "Prerequisite", etc.
236233
"""
237234
url = "https://sis9.rpi.edu/StudentRegistrationSsb/ssb/searchResults/getCourseDescription"
238235
params = {"term": term, "courseReferenceNumber": crn}
239236
async with session.get(url, params=params) as response:
240237
response.raise_for_status()
241238
raw_data = await response.text()
242239
raw_data = html_unescape(raw_data)
243-
description_data = {
244-
"description": "",
245-
"when_offered": "",
246-
}
247240
soup = bs4.BeautifulSoup(raw_data, "html5lib")
248241
description_tag = soup.find("section", {"aria-labelledby": "courseDescription"})
249-
description_text = [
250-
text.strip("\n").strip() for text in description_tag.text.split("\n")
242+
if description_tag is None:
243+
print(f"No description found for term and CRN: {term} - {crn}")
244+
return ""
245+
description_text_list = [
246+
text.strip() for text in description_tag.get_text(separator="\n").split("\n")
251247
]
252-
for text in description_text:
253-
print(text or "EMPTY")
254-
if text.startswith("When Offered:"):
255-
description_data["when_offered"] = text.replace("When Offered: ", "")
256-
# Skip useless fields that can be obtained elsewhere
257-
elif text.startswith("Credit Hours:"):
258-
continue
259-
elif text.startswith("Contact, Lecture or Lab Hours:"):
260-
continue
261-
elif text.startswith("Prerequisite:"):
262-
continue
263-
elif text.startswith("Corequisite:"):
264-
continue
265-
elif text.startswith("Cross Listed:"):
266-
continue
267-
else:
268-
description_data["description"] += text
269-
return description_data
248+
for text in description_text_list:
249+
if text != "":
250+
return text
270251

271252

272253
async def get_class_attributes(session: aiohttp.ClientSession, term: str, crn: str):

0 commit comments

Comments
 (0)