Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: DaRealFreak/saucenao
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: v1.0.6
Choose a base ref
...
head repository: DaRealFreak/saucenao
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: master
Choose a head ref
  • 10 commits
  • 10 files changed
  • 2 contributors

Commits on Jun 2, 2020

  1. [TASK] add additional function to retrieve results from BinaryIO obje…

    …cts instead of just file paths, related to #5
    DaRealFreak committed Jun 2, 2020
    Copy the full SHA
    88899d0 View commit details

Commits on Jun 3, 2020

  1. Copy the full SHA
    4fe9a7e View commit details
  2. Copy the full SHA
    21a58b9 View commit details
  3. Copy the full SHA
    64e2e81 View commit details
  4. Copy the full SHA
    93f6265 View commit details
  5. Copy the full SHA
    a538acd View commit details

Commits on Jun 4, 2020

  1. Copy the full SHA
    88d7023 View commit details

Commits on Jun 9, 2020

  1. Copy the full SHA
    a096c1c View commit details

Commits on Apr 29, 2021

  1. Copy the full SHA
    501de21 View commit details

Commits on May 27, 2021

  1. Merge pull request #7 from DaRealFreak/dependabot/add-v2-config-file

    Upgrade to GitHub-native Dependabot
    DaRealFreak authored May 27, 2021
    Copy the full SHA
    28a1abc View commit details
Showing with 197 additions and 48 deletions.
  1. +7 −0 .github/dependabot.yml
  2. +7 −1 README.md
  3. +1 −1 saucenao/__version__.py
  4. +2 −4 saucenao/http.py
  5. +32 −27 saucenao/saucenao.py
  6. +11 −7 saucenao/worker.py
  7. +7 −7 tests/test_http.py
  8. +72 −0 tests/test_saucenao.py
  9. +1 −1 tests/test_version.py
  10. +57 −0 tests/test_worker.py
7 changes: 7 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
version: 2
updates:
- package-ecosystem: pip
directory: "/"
schedule:
interval: daily
open-pull-requests-limit: 10
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -48,16 +48,22 @@ python usage.py --dir [--databases] [--minimum-similarity] [--combine-api-types]

you can also use it to get the gathered information for your own script:
```
# check with file names
filtered_results = saucenao.check_file(file_name='test.jpg')
# or with streams/byte objects
filtered_results = saucenao.check_file_object(io.BytesIO(b'\x00'))
```

or get a generator object for a bulk of files using the worker class, all parameters work here too:
```
from saucenao import Worker
results = Worker(directory='directory', files=('test.jpg', 'test2.jpg')).run()
results = Worker(directory='directory', files=('test.jpg', 'test2.jpg', io.BytesIO(b'\x00'))).run()
```

the worker automatically differentiates between file names and BinaryIO objects,
so you can simply pass both types at the same time.

## Running the tests
In the tests folder you can run each unittest individually.
The test cases should be self-explanatory.
2 changes: 1 addition & 1 deletion saucenao/__version__.py
Original file line number Diff line number Diff line change
@@ -3,7 +3,7 @@

__title__ = 'SauceNAO'
__description__ = 'Small module to work with SauceNAO locally'
__version__ = '1.0.6'
__version__ = '1.1.0'
__url__ = 'https://github.com/DaRealFreak/saucenao'
__author__ = 'DaRealFreak'
__author_email__ = 'steffen.keuper@web.de'
6 changes: 2 additions & 4 deletions saucenao/http.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
import time

import requests

@@ -13,11 +12,10 @@
STATUS_CODE_REPEAT = 3


def verify_status_code(request_response: requests.Response, file_name: str) -> tuple:
def verify_status_code(request_response: requests.Response) -> tuple:
"""Verify the status code of the post request to the search url and raise exceptions if the code is unexpected
:type request_response: requests.Response
:type file_name: str
:return:
"""
if request_response.status_code == 200:
@@ -36,7 +34,7 @@ def verify_status_code(request_response: requests.Response, file_name: str) -> t
elif request_response.status_code == 403:
raise InvalidOrWrongApiKeyException("Invalid or wrong API key")
elif request_response.status_code == 413:
msg = "Payload too large, skipping file: {0:s}".format(file_name)
msg = "Payload too large, skipping file"
return STATUS_CODE_SKIP, msg
else:
msg = "Unknown status code: {0:d}".format(request_response.status_code)
59 changes: 32 additions & 27 deletions saucenao/saucenao.py
Original file line number Diff line number Diff line change
@@ -6,8 +6,7 @@
import os
import re
import time
from mimetypes import MimeTypes
from typing import Generator
from typing import Generator, BinaryIO, Iterable

import requests
from bs4 import BeautifulSoup as Soup
@@ -95,10 +94,9 @@ class SauceNao(object):
CONTENT_AUTHOR_KEY = 'Creator'
CONTENT_CHARACTERS_KEY = 'Characters'

mime = None
logger = None

def __init__(self, directory, databases=SauceNaoDatabase.All, minimum_similarity=65, combine_api_types=False,
def __init__(self, directory='', databases=SauceNaoDatabase.All, minimum_similarity=65, combine_api_types=False,
api_key=None, is_premium=False, exclude_categories='', move_to_categories=False,
use_author_as_category=False, output_type=API_HTML_TYPE, start_file=None, log_level=logging.ERROR,
title_minimum_similarity=90):
@@ -146,7 +144,6 @@ def __init__(self, directory, databases=SauceNaoDatabase.All, minimum_similarity

self.previous_status_code = None

self.mime = MimeTypes()
logging.basicConfig(level=log_level)
self.logger = logging.getLogger("saucenao_logger")

@@ -160,29 +157,39 @@ def check_file(self, file_name: str) -> list:
:return:
"""
self.logger.info("checking file: {0:s}".format(file_name))
file_path = os.path.join(self.directory, file_name)
with open(file_path, 'rb') as file_object:
return self.check_file_object(file_object)

def check_file_object(self, file_content: BinaryIO) -> list:
"""Check the passed file content for results on SauceNAO
:type file_content: bytes
:return:
"""
if self.combine_api_types:
result = self.__check_image(file_name, self.API_HTML_TYPE)
result = self.__check_image(file_content, self.API_HTML_TYPE)
sorted_results = self.parse_results_json(result)

additional_result = self.__check_image(file_name, self.API_JSON_TYPE)
file_content.seek(0)
additional_result = self.__check_image(file_content, self.API_JSON_TYPE)
additional_sorted_results = self.parse_results_json(additional_result)
sorted_results = self.__merge_results(sorted_results, additional_sorted_results)
else:
result = self.__check_image(file_name, self.output_type)
result = self.__check_image(file_content, self.output_type)
sorted_results = self.parse_results_json(result)

filtered_results = self.__filter_results(sorted_results)
return filtered_results

def __get_http_data(self, file_path: str, output_type: int):
def __get_http_data(self, file_object: BinaryIO, output_type: int):
"""Prepare the http relevant data(files, headers, params) for the given file path and output type
:param file_path:
:param file_object:
:param output_type:
:return:
"""
with open(file_path, 'rb') as file_object:
files = {'file': file_object.read()}
files = {'file': file_object.read()}

headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
@@ -195,8 +202,7 @@ def __get_http_data(self, file_path: str, output_type: int):
}

params = {
'file': file_path,
'Content-Type': self.mime.guess_type(file_path),
'file': file_object,
# parameters taken from form on main page: https://saucenao.com/
'url': None,
'frame': 1,
@@ -211,19 +217,17 @@ def __get_http_data(self, file_path: str, output_type: int):

return files, params, headers

def __check_image(self, file_name: str, output_type: int) -> str:
"""Check the possible sources for the given file
def __check_image(self, file_object: BinaryIO, output_type: int) -> str:
"""Check the possible sources for the given file object
:type output_type: int
:type file_name: str
:type file_object: typing.BinaryIO
:return:
"""
file_path = os.path.join(self.directory, file_name)

files, params, headers = self.__get_http_data(file_path=file_path, output_type=output_type)
files, params, headers = self.__get_http_data(file_object=file_object, output_type=output_type)
link = requests.post(url=self.SEARCH_POST_URL, files=files, params=params, headers=headers)

code, msg = http.verify_status_code(link, file_name)
code, msg = http.verify_status_code(link)

if code == http.STATUS_CODE_SKIP:
self.logger.error(msg)
@@ -235,7 +239,7 @@ def __check_image(self, file_name: str, output_type: int) -> str:
"Received an unexpected status code (message: {msg}), repeating after 10 seconds...".format(msg=msg)
)
time.sleep(10)
return self.__check_image(file_name, output_type)
return self.__check_image(file_object, output_type)
else:
raise UnknownStatusCodeException(msg)
else:
@@ -302,7 +306,8 @@ def parse_results_json(text: str) -> list:
return sorted(results, key=lambda k: float(k['header']['similarity']), reverse=True)

def __filter_results(self, sorted_results) -> list:
"""Return results with a similarity bigger or the same as the defined similarity from the arguments (default 65%)
"""Return results with a similarity bigger or the same as the defined similarity from the arguments
(default 65%)
:type sorted_results: list|tuple|Generator
:return:
@@ -317,11 +322,11 @@ def __filter_results(self, sorted_results) -> list:
return filtered_results

@staticmethod
def get_content_value(results, key: str):
def get_content_value(results: Iterable, key: str):
"""Return the first match of Material in content
multiple sites have a categorisation which SauceNao utilizes to provide it in the content section
:type results: list|tuple|Generator
:type results: Iterable
:type key: str
:return:
"""
@@ -333,11 +338,11 @@ def get_content_value(results, key: str):
return ''

@staticmethod
def get_title_value(results, key: str):
def get_title_value(results: Iterable, key: str):
"""Return the first match of Material in the title section
SauceNAO provides the authors name in the title section f.e. if provided by the indexed entry
:type results: list|tuple|Generator
:type results: Iterable
:type key: str
:return:
"""
18 changes: 11 additions & 7 deletions saucenao/worker.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
import time
from typing import BinaryIO, Union, Iterable

try:
from titlesearch import get_similar_titles
@@ -15,12 +16,12 @@ class Worker(SauceNao):
Worker class for checking a list of files
"""

def __init__(self, files, *args, **kwargs):
def __init__(self, files: Iterable[Union[BinaryIO, str]], *args, **kwargs):
"""
initializing function
:type files: list|tuple|Generator
:param args:
:type files: Iterable
:type args:
:param kwargs:
"""
super().__init__(*args, **kwargs)
@@ -34,7 +35,10 @@ def run(self):
for file_name in self.files:
start_time = time.time()

filtered_results = self.check_file(file_name)
if isinstance(file_name, BinaryIO):
filtered_results = self.check_file_object(file_name)
else:
filtered_results = self.check_file(file_name)

if not filtered_results:
self.logger.info('No results found for image: {0:s}'.format(file_name))
@@ -79,7 +83,7 @@ def files(self):
return self.complete_file_list
return self.complete_file_list

def __get_category(self, results):
def __get_category(self, results: Union[Iterable]):
"""retrieve the category of the checked image based which can be either
the content of the image or the author of the image
@@ -104,11 +108,11 @@ def __get_category(self, results):
# take the first category
return categories[0]

def __move_to_categories(self, file_name: str, results):
def __move_to_categories(self, file_name: str, results: Iterable):
"""Check the file for categories and move it to the corresponding folder
:type file_name: str
:type results: list|tuple|Generator
:type results: Iterable
:return: bool
"""
category = self.__get_category(results)
14 changes: 7 additions & 7 deletions tests/test_http.py
Original file line number Diff line number Diff line change
@@ -24,7 +24,7 @@ def test_status_code_ok(self, mock):
:return:
"""
mock.get(self.dummy_url, status_code=200)
status_code, msg = verify_status_code(request_response=requests.get(self.dummy_url), file_name='tmp')
status_code, msg = verify_status_code(request_response=requests.get(self.dummy_url))
self.assertEqual(status_code, STATUS_CODE_OK)

@requests_mock.mock()
@@ -34,7 +34,7 @@ def test_status_code_skip(self, mock):
:return:
"""
mock.get(self.dummy_url, status_code=413)
status_code, msg = verify_status_code(request_response=requests.get(self.dummy_url), file_name='tmp')
status_code, msg = verify_status_code(request_response=requests.get(self.dummy_url))
self.assertEqual(status_code, STATUS_CODE_SKIP)

@requests_mock.mock()
@@ -44,7 +44,7 @@ def test_status_code_repeat(self, mock):
:return:
"""
mock.get(self.dummy_url, text='', status_code=999)
status_code, msg = verify_status_code(request_response=requests.get(self.dummy_url), file_name='tmp')
status_code, msg = verify_status_code(request_response=requests.get(self.dummy_url))
self.assertEqual(status_code, STATUS_CODE_REPEAT)

@requests_mock.mock()
@@ -55,7 +55,7 @@ def test_status_code_api_key(self, mock):
"""
mock.get(self.dummy_url, text='', status_code=403)
with self.assertRaises(InvalidOrWrongApiKeyException) as _:
verify_status_code(request_response=requests.get(self.dummy_url), file_name='tmp')
verify_status_code(request_response=requests.get(self.dummy_url))

@requests_mock.mock()
def test_status_code_limit(self, mock):
@@ -65,17 +65,17 @@ def test_status_code_limit(self, mock):
"""
mock.get(self.dummy_url, text='limit of 150 searches reached', status_code=429)
with self.assertRaises(DailyLimitReachedException) as exception:
verify_status_code(request_response=requests.get(self.dummy_url), file_name='tmp')
verify_status_code(request_response=requests.get(self.dummy_url))
self.assertEqual(str(exception), 'Daily search limit for unregistered users reached')

mock.get(self.dummy_url, text='limit of 300 searches reached', status_code=429)
with self.assertRaises(DailyLimitReachedException) as exception:
verify_status_code(request_response=requests.get(self.dummy_url), file_name='tmp')
verify_status_code(request_response=requests.get(self.dummy_url))
self.assertEqual(str(exception), 'Daily search limit for basic users reached')

mock.get(self.dummy_url, status_code=429)
with self.assertRaises(DailyLimitReachedException) as exception:
verify_status_code(request_response=requests.get(self.dummy_url), file_name='tmp')
verify_status_code(request_response=requests.get(self.dummy_url))
self.assertEqual(str(exception), 'Daily search limit reached')


Loading