99
1010import aiohttp
1111import bs4
12+ from prereq_parser import parse_prereq
1213
1314OUTPUT_DATA_DIR = "data"
1415
@@ -326,12 +327,51 @@ async def get_class_restrictions(session: aiohttp.ClientSession, term: str, crn:
326327 return restrictions_data
327328
328329
329- async def get_class_prerequisites (session : aiohttp .ClientSession , term : str , crn : str ):
330+ async def get_class_prerequisites (
331+ session : aiohttp .ClientSession ,
332+ term : str ,
333+ crn : str ,
334+ subject_name_code_map : dict [str , str ],
335+ ):
330336 """
331337 Fetches and parses data from the "Prerequisites" tab of a class details page.
332338 """
333339 url = "https://sis9.rpi.edu/StudentRegistrationSsb/ssb/searchResults/getSectionPrerequisites"
334340 params = {"term" : term , "courseReferenceNumber" : crn }
341+ async with session .get (url , params = params ) as response :
342+ response .raise_for_status ()
343+ text = await response .text ()
344+ soup = bs4 .BeautifulSoup (text , "html5lib" )
345+
346+ data = ""
347+ rows = soup .find_all ("tr" )
348+ for row in rows :
349+ cols = row .find_all ("td" )
350+ if len (cols ) == 0 :
351+ continue
352+ data += (
353+ " and " if cols [0 ].text == "And" else " or " if cols [0 ].text == "Or" else ""
354+ )
355+ data += " ( " if cols [1 ].text != "" else ""
356+ if cols [2 ].text != "" :
357+ data += f" { cols [2 ].text } { cols [3 ].text } "
358+ else :
359+ if cols [4 ].text not in subject_name_code_map :
360+ print (f"Unknown department in CRN { crn } : { cols [4 ].text } " )
361+ data += f" { cols [4 ].text } { cols [5 ].text } "
362+ else :
363+ data += f" { subject_name_code_map [cols [4 ].text ]} { cols [5 ].text } "
364+ data += " ) " if cols [8 ].text != "" else ""
365+ data = data .replace (" " , " " ).strip ()
366+ data = data .replace (" " , " " ).strip ()
367+ data = data .replace ("( " , "(" ).strip ()
368+ data = data .replace (" )" , ")" ).strip ()
369+ if data :
370+ try :
371+ return parse_prereq (crn , data )
372+ except Exception as e :
373+ print (f"Error parsing prerequisites for CRN { crn } with data: { data } - { e } " )
374+ return {}
335375
336376
337377async def get_class_corequisites (
@@ -480,7 +520,9 @@ async def process_class_details(
480520 description_task = tg .create_task (get_class_description (session , term , crn ))
481521 attributes_task = tg .create_task (get_class_attributes (session , term , crn ))
482522 restrictions_task = tg .create_task (get_class_restrictions (session , term , crn ))
483- # prerequisites_task = tg.create_task(get_class_prerequisites(session, term, crn))
523+ prerequisites_task = tg .create_task (
524+ get_class_prerequisites (session , term , crn , subject_name_code_map )
525+ )
484526 corequisites_task = tg .create_task (
485527 get_class_corequisites (session , term , crn , subject_name_code_map )
486528 )
@@ -490,7 +532,7 @@ async def process_class_details(
490532 description_data = description_task .result ()
491533 attributes_data = attributes_task .result ()
492534 restrictions_data = restrictions_task .result ()
493- # prerequisites_data = prerequisites_task.result()
535+ prerequisites_data = prerequisites_task .result ()
494536 corequisites_data = corequisites_task .result ()
495537 # crosslists_data = crosslists_task.result()
496538
@@ -502,7 +544,7 @@ async def process_class_details(
502544 "course_detail" : {
503545 "description" : description_data ["description" ],
504546 "corequisite" : corequisites_data ,
505- "prerequisite" : [] ,
547+ "prerequisite" : prerequisites_data ,
506548 "crosslist" : [],
507549 "attributes" : attributes_data ,
508550 "restrictions" : restrictions_data ,
@@ -623,9 +665,7 @@ async def get_term_course_data(
623665 print (f"Processing { len (subjects )} subjects for term: { term } " )
624666
625667 # Create reverse mapping of subject names to codes
626- subject_name_code_map = {}
627- for subject in subjects :
628- subject_name_code_map [subject ["description" ]] = subject ["code" ]
668+ subject_name_code_map = await get_reverse_subject_map (session )
629669
630670 # Stores all course data for the term
631671 all_course_data = {}
@@ -730,7 +770,6 @@ async def main(start_year: int, end_year: int, seasons: list[str] = None) -> boo
730770
731771if __name__ == "__main__" :
732772 start_year = 2023
733- start_year = 2025
734773 end_year = 2025
735774 start_time = time .time ()
736775 asyncio .run (main (start_year , end_year ))
0 commit comments