diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml new file mode 100644 index 0000000..5f5cb28 --- /dev/null +++ b/.github/workflows/python-package.yml @@ -0,0 +1,44 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python + +name: Python package + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.9", "3.10", "3.11"] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install flake8 pytest + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Test with pytest + run: | + echo "Executing Unit Tests..." + coverage run -m pytest tests/ + + echo "Generating Report..." + coverage report -m diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 996751a..bdc0c54 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -71,7 +71,7 @@ First, a local project environment needs to be created, then the project's modul scripts/lint.sh ``` -4. Tag and version code changes. This will trigger a build in **Google Cloud Platform (GCP)** that will be associated with the pull request. +4. Tag and version code changes. ```sh git tag -a "1.2.3" -m "Version 1.2.3" diff --git a/examples/hello_world.py b/examples/hello_world.py index 8fe7341..01c75e7 100644 --- a/examples/hello_world.py +++ b/examples/hello_world.py @@ -20,7 +20,7 @@ def quality_assurance(data): for col in data.columns: if data[col].dtype == 'object': data[col] = data[col].apply(lambda x: str(x) if isinstance(x, dict) else x) - + data = data.drop_duplicates() return data @@ -31,7 +31,7 @@ def quality_assurance(data): pipeline.configure({ "data_source": "local", "data_source_path": "/path/to/directory", - "data_format" : "csv" + "data_format": "csv" }) # Extract diff --git a/examples/quickstart.py b/examples/quickstart.py index 5af8929..56ccd63 100644 --- a/examples/quickstart.py +++ b/examples/quickstart.py @@ -1,6 +1,5 @@ """Example SerenadeFlow Usage.""" -import pandas as pd from serenade_flow import pipeline @@ -10,7 +9,7 @@ pipeline.configure({ "data_source": "local", "data_source_path": "/path/to/directory", - "data_format" : "csv" + "data_format": "csv" }) # Extract diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 9d7fd23..50500e9 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -11,7 +11,7 @@ def test_extact_local(): pipeline.configure({ "data_source": "local", "data_source_path": "/path/to/local/directory", - "data_format" : "csv" + "data_format": "csv" }) data = pipeline.extract() assert len(data) == 3 @@ -23,7 +23,7 @@ def test_extact_remote(): pipeline.configure({ "data_source": "remote", "data_source_path": "http://path/to/storage/bucket", - "data_format" : "csv" + "data_format": "csv" }) data = pipeline.extract() assert data == {} @@ -35,7 +35,7 @@ def test_load(): pipeline.configure({ "data_source": "local", "data_source_path": "http://path/to/storage/bucket", - "data_format" : "csv" + "data_format": "csv" }) data = pipeline.extract() assert pipeline.load(data, "output") == "Data loaded successfully"