Skip to content

extraction: refine img src url and fix table extraction bugs #1509

extraction: refine img src url and fix table extraction bugs

extraction: refine img src url and fix table extraction bugs #1509

Workflow file for this run

# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
name: Tests
on:
push:
branches: [ master ]
pull_request:
branches: [ master ]
jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
# https://github.com/actions/python-versions/blob/main/versions-manifest.json
python-version: ["3.9", "3.11", "3.13"] # "3.14-dev"
env:
- MINIMAL: "true"
PROXY_TEST: "false"
- MINIMAL: "false"
PROXY_TEST: "true"
include:
# custom python versions
- os: ubuntu-20.04
python-version: 3.8
- os: macos-13
python-version: 3.8
- os: macos-latest
python-version: "3.10"
- os: windows-latest
python-version: "3.10"
- os: ubuntu-latest
python-version: "3.12"
services:
socks_proxy:
image: ${{ matrix.os == 'ubuntu-latest' && 'serjs/go-socks5-proxy' || '' }}
ports:
- 1080:1080
socks_proxy_auth:
image: ${{ matrix.os == 'ubuntu-latest' && 'serjs/go-socks5-proxy' || '' }}
env:
PROXY_USER: user
PROXY_PASSWORD: pass
ports:
- 1081:1080
steps:
# Python and pip setup
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Upgrade pip
run: python -m pip install --upgrade pip
- name: Get pip cache dir
id: pip-cache
run: |
echo "::set-output name=dir::$(pip cache dir)"
- name: pip cache
uses: actions/cache@v4
with:
path: ${{ steps.pip-cache.outputs.dir }}
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-
# package setup
- uses: actions/checkout@v4
- name: Install dependencies
run: python -m pip install -e ".[dev]"
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
# pycurl installation fix
- name: Install packages required by pycurl
if: ${{ matrix.env.MINIMAL == 'false' }}
run: |
sudo apt-get update
sudo apt-get install libcurl4-gnutls-dev libgnutls28-dev
# alternatively: sudo apt-get install libcurl4-openssl-dev libssl-dev
- name: Install full dependencies
if: ${{ matrix.env.MINIMAL == 'false' }}
run: python -m pip install -e ".[all]"
- name: Type checking
if: ${{ matrix.env.MINIMAL == 'false' && matrix.python-version == '3.13' }}
run: |
mypy -p trafilatura
- name: Test with pytest
run: |
python -m pytest --cov=./ --cov-report=xml
env:
PROXY_TEST: ${{ matrix.env.PROXY_TEST }}
# coverage
- name: Upload coverage to Codecov
if: ${{ matrix.env.MINIMAL == 'false' && matrix.python-version == '3.13' }}
uses: codecov/codecov-action@v4
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
with:
fail_ci_if_error: true
files: ./coverage.xml
verbose: true