From f6b01c1176ffa835edb80d2c455a0b8110c90bba Mon Sep 17 00:00:00 2001 From: Sjoerd Arendsen Date: Thu, 8 Nov 2018 18:43:48 +0100 Subject: [PATCH] Guessit 3.0 (#2197) [change] guessit: upgraded to 3.0.3 --- .gitignore | 1 + dev-requirements.txt | 2 +- flexget/plugins/parsers/parser_guessit.py | 56 ++++++++++++++++++----- flexget/tests/test_seriesparser.py | 1 + flexget/utils/imdb.py | 2 +- requirements.in | 8 ++-- requirements.txt | 2 +- 7 files changed, 53 insertions(+), 19 deletions(-) diff --git a/.gitignore b/.gitignore index 61ed7a8240..0a570dbd7b 100644 --- a/.gitignore +++ b/.gitignore @@ -43,3 +43,4 @@ pip-selfcheck.json !variables.yml cached_resources/ pyvenv.cfg +.pytest_cache diff --git a/dev-requirements.txt b/dev-requirements.txt index 2531d66a09..5735e1354b 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -14,5 +14,5 @@ pytest-runner pytest-cov==2.5.1 gitpython==2.1.5 codacy-coverage>=1.2.18 -pip-tools==1.9.0 +pip-tools==2.0.2 twine==1.11.0 diff --git a/flexget/plugins/parsers/parser_guessit.py b/flexget/plugins/parsers/parser_guessit.py index 3508ee246e..b1e3b59cc2 100644 --- a/flexget/plugins/parsers/parser_guessit.py +++ b/flexget/plugins/parsers/parser_guessit.py @@ -35,7 +35,17 @@ def _id_regexps_function(input_string, context): _id_regexps = Rebulk().functional(_id_regexps_function, name='regexpId', disabled=lambda context: not context.get('id_regexps')) -guessit_api = GuessItApi(rebulk_builder().rebulk(_id_regexps)) +def rules_builder(config): + rebulk = rebulk_builder(config) + rebulk.rebulk(_id_regexps) + return rebulk + + +guessit_api = GuessItApi() +guessit_api.configure( + options={}, + rules_builder=rules_builder, + force=True) def normalize_component(data): @@ -48,10 +58,22 @@ def normalize_component(data): class ParserGuessit(object): + SOURCE_MAP = { + 'Camera': 'cam', + 'HD Camera': 'cam', + 'HD Telesync': 'telesync', + 'Pay-per-view': 'ppv', + 'Digital TV': 'dvb', + 'Video on Demand': 'vod', + 'Analog HDTV': 'ahdtv', + 'Ultra HDTV': 'uhdtv', + 'HD Telecine': 'hdtc', + 'Web': 'web-dl' + } + @staticmethod def _guessit_options(options): settings = {'name_only': True, 'allowed_languages': ['en', 'fr'], 'allowed_countries': ['us', 'uk', 'gb']} - # 'clean_function': clean_value options['episode_prefer_number'] = not options.get('identified_by') == 'ep' if options.get('allow_groups'): options['expected_group'] = options['allow_groups'] @@ -79,17 +101,18 @@ def _proper_count(guessit_result): else: version -= 1 proper_count = guessit_result.get('proper_count', 0) - fastsub = 'fastsub' in normalize_component(guessit_result.get('other')) + fastsub = 'fast subtitled' in normalize_component(guessit_result.get('other')) return version + proper_count - (5 if fastsub else 0) - def _quality(self, guessit_result): - """Generate a FlexGet Quality from a guessit result.""" - resolution = normalize_component(guessit_result.get('screen_size')) + def _source(self, guessit_result): other = normalize_component(guessit_result.get('other')) - if not resolution and 'hr' in other: - resolution.append('hr') + source = self.SOURCE_MAP.get(guessit_result.get('source'), guessit_result.get('source')) + # special case + if source == 'web-dl' and 'Rip' in other: + source = 'webrip' + + source = normalize_component(source) - source = normalize_component(guessit_result.get('format')) if 'preair' in other: source.append('preair') if 'screener' in other: @@ -100,6 +123,17 @@ def _quality(self, guessit_result): if 'r5' in other: source.append('r5') + return source + + def _quality(self, guessit_result): + """Generate a FlexGet Quality from a guessit result.""" + resolution = normalize_component(guessit_result.get('screen_size')) + other = normalize_component(guessit_result.get('other')) + if not resolution and 'high resolution' in other: + resolution.append('hr') + + source = self._source(guessit_result) + codec = normalize_component(guessit_result.get('video_codec')) if '10bit' in normalize_component(guessit_result.get('video_profile')): codec.append('10bit') @@ -108,9 +142,9 @@ def _quality(self, guessit_result): audio_profile = normalize_component(guessit_result.get('audio_profile')) audio_channels = normalize_component(guessit_result.get('audio_channels')) # unlike the other components, audio can be a bit iffy with multiple codecs, so we limit it to one - if 'dts' in audio and any(hd in audio_profile for hd in ['HD', 'HDMA']): + if 'dts' in audio and any(hd in audio_profile for hd in ['hd', 'master audio']): audio = ['dtshd'] - elif '5.1' in audio_channels and any(dd in audio for dd in ['ac3', 'dolbydigital']): + elif '5.1' in audio_channels and any(dd in audio for dd in ['dolby digital']): audio = ['dd5.1'] # Make sure everything are strings (guessit will return lists when there are multiples) diff --git a/flexget/tests/test_seriesparser.py b/flexget/tests/test_seriesparser.py index b7e99fcfbd..80f945dc2a 100644 --- a/flexget/tests/test_seriesparser.py +++ b/flexget/tests/test_seriesparser.py @@ -110,6 +110,7 @@ def test_season_episode(self, parse): s = parse(name='Something', data='Something - Season2 Episode2') assert (s.season == 2 and s.episode == 2), 'failed to parse %s' % s + @pytest.mark.xfail(reason='Not supported in guessit, works for internal parser') def test_series_episode(self, parse): """SeriesParser: series X, episode Y""" s = parse(name='Something', data='Something - Series 2, Episode 2') diff --git a/flexget/utils/imdb.py b/flexget/utils/imdb.py index dd0c15fcff..f80bb717c6 100644 --- a/flexget/utils/imdb.py +++ b/flexget/utils/imdb.py @@ -94,7 +94,7 @@ def smart_match(self, raw_name, single_match=True): parser = get_plugin_by_name('parsing').instance.parse_movie(raw_name) name = parser.name year = parser.year - if name == '': + if not name: log.critical('Failed to parse name from %s', raw_name) return None log.debug('smart_match name=%s year=%s' % (name, str(year))) diff --git a/requirements.in b/requirements.in index b7434bb96c..011a44c2ce 100644 --- a/requirements.in +++ b/requirements.in @@ -11,14 +11,12 @@ rpyc==3.3.0 jinja2~=2.10 # There is a bug in requests 2.4.0 where it leaks urllib3 exceptions requests~=2.16.3 -#Guessit requires python-dateutil<=2.5.2 -python-dateutil>=2.5.3 +python-dateutil jsonschema>=2.0 path.py>=8.1.1 pathlib>=1.0; python_version<'3.4' -guessit<=2.1.4 -# Rebulk changes how guessit works higher than 0.8.2 -rebulk==0.9.0 +guessit==3.0.3 +rebulk>=0.9.0 apscheduler>=3.2.0 terminaltables>=3.1.0 colorclass>=2.2.0 diff --git a/requirements.txt b/requirements.txt index 9b8e9b2751..289a0c8e55 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,7 +22,7 @@ flask-restful==0.3.6 flask-restplus==0.10.1 flask==0.12.2 future==0.16.0 -guessit==2.1.4 +guessit==3.0.3 html5lib==0.999999999 idna==2.5 # via requests itsdangerous==0.24 # via flask