diff --git a/src/wordhunt.py b/src/wordhunt.py index 76cb0d7..5c61612 100644 --- a/src/wordhunt.py +++ b/src/wordhunt.py @@ -56,6 +56,47 @@ def wordSearch2(start_index, G, dictionary, words, frames, prefix="", visited=No return frames +# early stopping using trigram matching +def wordSearch3(start_index, G, dictionary, trigram_dict ,words, frames, prefix="", visited=None): + if visited==None: + visited = [] + + start = G.get_node_val(start_index) + prefix += start + visited.append(start_index) + + if dictionary.search(prefix) and not prefix in words: + words.append(prefix) + + for n in G.get_neighbours(start_index): + letter = G.get_node_val(n) + if dictionary.is_prefix(prefix + letter) and not n in visited: + + if len(prefix + letter) >= 3: + trigram = (prefix + letter)[-3:] + if not trigram_dict.search(trigram): + continue + + wordSearch3(start_index=n, + G=G, + dictionary=dictionary, + trigram_dict=trigram_dict, + frames=frames, + prefix=prefix, + visited=visited.copy(), + words=words) + else: + # only add meaningful frames + if not n in visited: + visited2 = visited.copy() + visited2.append(n) + frames.append((visited2, prefix + letter, words.copy())) + + return frames + + + + def solveBoard(n, G, dictionary, words): for i in range(n * n): wordSearch(i, G, dictionary, words) @@ -64,6 +105,10 @@ def solveBoard2(n, G, dictionary, words, frames): for i in range(n * n): wordSearch2(i, G, dictionary, words, frames) +def solveBoard3(n, G, dictionary, trigram_dict ,words, frames): + for i in range(n * n): + wordSearch3(i, G, dictionary, trigram_dict, words, frames) + def createDictionary(filepath, dictionary): f = open(filepath, 'r') lines = f.readlines() @@ -75,6 +120,7 @@ def createDictionary(filepath, dictionary): board_letters = "mdacofhraueumnne" G = create_board(board_letters, 4) dictionary = createDictionary('vocabulary/scrabble_wordbank_2019.txt', Trie()) +trigram_dict = createDictionary('vocabulary/trigram_table.txt', Trie()) # prints board print_board(4, G) @@ -99,6 +145,6 @@ def createDictionary(filepath, dictionary): font_scale = 2 # frames_data = [[0, 1, 4, 9], [1, 2, 5, 9]] # JUST FOR TESTING -solveBoard2(4, G, dictionary, words, frames) +solveBoard3(4, G, dictionary, trigram_dict, words, frames) rendered_frames = draw_frames(frames, img_dims, lett_per_row, thickness, font_face, font_scale, board_letters, font_face) create_mp4(rendered_frames, img_dims) \ No newline at end of file diff --git a/vocabulary/trigram_table.txt b/vocabulary/trigram_table.txt new file mode 100644 index 0000000..b9cdd87 --- /dev/null +++ b/vocabulary/trigram_table.txt @@ -0,0 +1,776 @@ +ABL +ABO +ABR +ABS +ACC +ACH +ACR +ACT +ADD +ADM +ADV +AER +AFF +AFR +AFT +AGA +AGE +AGO +AHE +AIR +ALI +ALL +ALM +ALO +ALR +ALS +ALT +ALW +AM +AME +AMO +AND +ANG +ANI +ANO +ANS +ANY +APA +APP +APR +ARE +ARG +ARM +ARR +ART +ASI +ASK +ASL +ATE +ATM +ATT +AUD +AUG +AUT +AVE +AVO +AWA +CAK +CAL +CAM +CAN +CAP +CAR +CAS +CAT +CAU +CEN +CER +CHA +CHE +CHI +CHO +CHI +CHU +CIR +CIT +CLA +CLE +CLI +CLO +COA +COF +COI +COL +COM +CON +COO +COP +COR +COS +COU +COV +COW +CRE +CRI +CRO +CRY +CUP +CUR +CUS +CUT +BAB +BAC +BAD +BAG +BAL +BAN +BAS +BAT +BE +BEA +BEC +BED +BEE +BEF +BEG +BEH +BEL +BES +BET +BEY +BIC +BIG +BIK +BIL +BIR +BIT +BLA +BLE +BLO +BLU +BOA +BOD +BON +BOO +BOR +BOT +BOU +BOX +BOY +BRA +BRE +BRI +BRO +BUI +BUR +BUS +BUT +BUY +BY +EAC +EAR +EAS +EAT +EDU +EFF +EGG +EIG +EIT +ELE +ELS +EMP +END +ENE +ENG +ENJ +ENO +ENT +EQU +ERR +ESP +EUR +EVE +EXA +EXC +EXE +EXP +EYE +DAD +DAI +DAM +DAN +DAR +DAT +DAU +DAY +DEA +DEC +DEE +DEF +DEG +DEL +DEM +DEN +DEP +DES +DET +DEV +DIC +DID +DIE +DIF +DIG +DIN +DIR +DIS +DIV +DO +DOC +DOE +DOG +DOL +DON +DOO +DOU +DOW +DOZ +DRA +DRE +DRI +DRO +DRU +DRY +DUE +DUR +DUS +DUT +GAI +GAM +GAR +GAS +GAT +GAV +GEN +GER +GET +GIR +GIV +GLA +GO +GOD +GOI +GON +GOO +GOT +GOV +GRA +GRE +GRO +GUA +GUE +GUI +GUN +FAC +FAI +FAL +FAM +FAR +FAS +FAT +FEA +FEB +FED +FEE +FEL +FEW +FIE +FIF +FIG +FIL +FIN +FIR +FIS +FIV +FIX +FLA +FLE +FLI +FLO +FLY +FOL +FOO +FOR +FOU +FRA +FRE +FRI +FRO +FRU +FUL +FUN +FUT +I +ICE +IDE +IF +IMA +IMM +IMP +IN +INC +IND +INF +INS +INT +INV +IRO +IS +ISL +IT +ITS +HAB +HAD +HAI +HAL +HAN +HAP +HAR +HAS +HAT +HAV +HE +HEA +HEI +HEL +HER +HI +HID +HIG +HIL +HIM +HIR +HIS +HIT +HOL +HOM +HON +HOP +HOR +HOS +HOT +HOU +HOW +HUM +HUN +HUR +HUS +KEE +KEP +KEY +KIC +KIL +KIN +KIT +KNE +KNI +KNO +KAB +KAD +KAI +KAK +KAN +KAR +KAS +KAT +KAU +KAW +KAY +KAZ +KEA +KED +KEF +KEG +KEN +KES +KET +KEV +KIB +KIE +KIF +KIG +KIK +KIM +KIN +KIS +KIT +KIV +KOC +KON +KOO +KOS +KOT +KOU +KOV +KOW +KUN +KYI +KAC +KAD +KAG +KAI +KAJ +KAK +KAN +KAP +KAR +KAT +KAY +KE +KEA +KED +KEE +KEM +KEN +KES +KET +KID +KIG +KIL +KIN +KIS +KOD +KOM +KON +KOO +KOR +KOS +KOT +KOU +KOV +KUC +KUM +KUS +KY +KYS +KAM +KAR +KAT +KEA +KEC +KEE +KEI +KEV +KEW +KEX +KIC +KIG +KIN +KO +KOB +KOI +KON +KOO +KOR +KOS +KOT +KOV +KOW +KUM +KBJ +K'C +KCT +KF +KFF +KFT +KH +KIL +KKA +KLD +KN +KNC +KNE +KNL +KPE +KPI +KPP +KR +KRA +KRD +KTH +KUR +KUT +KVE +KWN +JAN +JAP +JOB +JOI +JUD +JUL +JUM +JUN +JUS +QUA +QUE +QUI +PAC +PAG +PAI +PAP +PAR +PAS +PAT +PAY +PEA +PEN +PEO +PER +PHO +PIC +PIE +PIN +PIP +PLA +PLE +POC +POI +POL +POO +POP +POS +POT +POU +POW +PRA +PRE +PRI +PRO +PUB +PUL +PUP +PUR +PUS +PUT +SAD +SAF +SAI +SAL +SAM +SAN +SAT +SAV +SAW +SAY +SCE +SCH +SCI +SCO +SEA +SEC +SEE +SEL +SEN +SEP +SER +SET +SEV +SEX +SHA +SHE +SHI +SHO +SHU +SIC +SID +SIG +SIL +SIM +SIN +SIS +SIT +SIX +SIZ +SKI +SKY +SLE +SLI +SLO +SMA +SME +SMI +SMO +SNO +SO +SOA +SOC +SOF +SOI +SOL +SOM +SON +SOO +SOR +SOU +SPA +SPE +SPI +SPO +SPR +SQU +STA +STE +STI +STO +STR +STU +STY +SUB +SUC +SUD +SUF +SUG +SUM +SUN +SUP +SUR +SWA +SWE +SWI +SWU +SYS +RAC +RAD +RAI +RAN +RAP +RAT +REA +REC +RED +REF +REG +REL +REM +REP +REQ +RES +RET +RIC +RID +RIG +RIN +RIS +RIV +ROA +ROC +ROD +ROL +ROO +ROS +ROU +ROW +RUL +RUN +RUS +UNA +UNC +UND +UNI +UNL +UNT +UP +UPO +US +USE +USU +TAB +TAK +TAL +TAS +TAU +TAX +TEA +TEE +TEL +TEM +TEN +TER +TES +THA +THE +THI +THO +THR +THU +TIC +TIE +TIL +TIM +TIR +TIT +TO +TOD +TOG +TOL +TOM +TON +TOO +TOP +TOR +TOT +TOU +TOW +TRA +TRE +TRI +TRO +TRU +TRY +TUE +TUR +TV +TWE +TWI +TWO +TYI +TYP +VAL +VAR +VEG +VER +VIE +VIL +VIS +VOI +VOL +VOT +VAI +VAK +VAL +VAN +VAR +VAS +VAT +VAV +VAY +VE +VEA +VED +VEE +VEI +VEL +VEN +VER +VES +VET +VHA +VHE +VHI +VHO +VHY +VID +VIF +VIL +VIN +VIR +VIS +VIT +VIV +VOK +VOM +VON +VOO +VOR +VOU +VRI +VRO +VMA +YAR +YEA +YEL +YEN +YES +YET +YOU +ZER \ No newline at end of file