diff --git a/xword_dl/downloader/voxdownloader.py b/xword_dl/downloader/voxdownloader.py index 629efc6..7a25ded 100644 --- a/xword_dl/downloader/voxdownloader.py +++ b/xword_dl/downloader/voxdownloader.py @@ -16,3 +16,15 @@ def __init__(self, **kwargs): def guess_date_from_id(self, puzzle_id): self.date = datetime.datetime.strptime(puzzle_id.split('_')[1], '%Y%m%d') + + def find_by_date(self, dt): + url_formatted_date = dt.strftime('%Y%m%d') + prefixes = ['vox_', 'PBvox_', 'APvox_', 'WNvox_', 'AOKvox_', 'JGvox_', 'AJRvox_', 'JGVox_'] # The author varies by day, and their initials may or may not be present as a prefix + suffixes = ['_1000', '_1100', '', '_1000%20(1)', '_1101'] # The suffix is always one of these. I can't determine the pattern. + self.get_and_add_picker_token() + candidate_urls = [] + for suffix in suffixes: # On average, it will be faster to search by the most frequent suffixes first + for prefix in prefixes: + self.id = prefix + url_formatted_date + suffix + candidate_urls.append(self.find_puzzle_url_from_id(self.id)) + return candidate_urls diff --git a/xword_dl/xword_dl.py b/xword_dl/xword_dl.py index 063ad65..57e4686 100644 --- a/xword_dl/xword_dl.py +++ b/xword_dl/xword_dl.py @@ -43,12 +43,28 @@ def by_keyword(keyword, **kwargs): raise XWordDLException( 'Selection by date not available for {}.'.format(dl.outlet)) - puzzle = dl.download(puzzle_url) + if isinstance(puzzle_url, list): + puzzle = get_puzzle_from_candidate_urls(dl, puzzle_url) + else: + puzzle = dl.download(puzzle_url) filename = dl.pick_filename(puzzle) return puzzle, filename +def get_puzzle_from_candidate_urls(dl, candidate_urls): + puzzle = None + for url in candidate_urls: + try: + puzzle = dl.download(url) + if puzzle: + return puzzle + except Exception: + continue + if not puzzle: + raise XWordDLException('Crossword puzzle not found.') + + def by_url(url, **kwargs): supported_downloaders = [d[1] for d in get_supported_outlets(command_only=False) @@ -121,7 +137,7 @@ def parse_for_embedded_puzzle(url, **kwargs): def get_supported_outlets(command_only=True): all_classes = inspect.getmembers(sys.modules['xword_dl.downloader'], inspect.isclass) - dls = [d for d in all_classes if issubclass(d[1], + dls = [d for d in all_classes if issubclass(d[1], downloader.BaseDownloader)] if command_only: