+name: Build and upload to PyPI
+ release:
+ types: [published]
+ publish:
+ runs-on: ubuntu-22.04
+ permissions:
+ id-token: write # mandatory for PyPI trusted publishing
+ steps:
+ - uses: actions/checkout@v3
+ - name: Set up Python
+ uses: actions/setup-python@v4
+ with:
+ python-version-file: scraper/pyproject.toml
+ architecture: x64
+ - name: Build packages
+ working-directory: scraper
+ run: |
+ pip install -U pip build
+ python -m build --sdist --wheel
+ - name: Upload to PyPI
+ uses: pypa/gh-action-pypi-publish@release/v1.8
+ with:
+ packages-dir: scraper/dist/
+ - name: Build and push Docker image
+ uses: openzim/docker-publish-action@v10
+ with:
+ image-name: openzim/freecodecamp
+ tag-pattern: /^v([0-9.]+)$/
+ latest-on-tag: true
+ restrict-to: openzim/freecodecamp
+ registries: ghcr.io
+ credentials:
+ GHCRIO_TOKEN=${{ secrets.GHCR_TOKEN }}
+ repo_description: auto
+ repo_overview: auto
+name: Tests
+ pull_request:
+ push:
+ branches:
+ - main
+ run-tests:
+ runs-on: ubuntu-22.04
+ steps:
+ - uses: actions/checkout@v3
+ - name: Set up Python
+ uses: actions/setup-python@v4
+ with:
+ python-version-file: scraper/pyproject.toml
+ architecture: x64
+ - name: Install dependencies (and project)
+ working-directory: scraper
+ run: |
+ pip install -U pip
+ pip install -e .[test,scripts]
+ - name: Run the tests
+ working-directory: scraper
+ run: inv coverage --args "-vvv"
+ - name: Upload coverage report to codecov
+ uses: codecov/codecov-action@v3
+ with:
+ directory: backend
+ token: ${{ secrets.CODECOV_TOKEN }}
+ build_python:
+ runs-on: ubuntu-22.04
+ steps:
+ - uses: actions/checkout@v3
+ - name: Set up Python
+ uses: actions/setup-python@v4
+ with:
+ python-version-file: scraper/pyproject.toml
+ architecture: x64
+ - name: Ensure we can build Python targets
+ working-directory: scraper
+ run: |
+ pip install -U pip build
+ python3 -m build --sdist --wheel
+ build_docker:
+ runs-on: ubuntu-22.04
+ steps:
+ - uses: actions/checkout@v3
+ - name: Ensure we can build the Docker image
+ run: |
+ docker build -t testimage .
+ - name: Ensure we can start the Docker image
+ run: |
+ docker run --rm testimage --version
+# ignore all vscode, this configuration is not maintained
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..8bc3161
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,11 @@
+# Changelog
+All notable changes to this project will be documented in this file.
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [Unreleased]
+### Added
+- Initial version, supporting only Javascript challenges
new file mode 100644
index 0000000..5cfc18d
--- /dev/null
@@ -0,0 +1,45 @@
+# Contributing
+This project adheres to openZIM's [Contribution Guidelines](https://github.com/openzim/overview/wiki/Contributing)
+and openZIM's [Bootstrap conventions](https://github.com/openzim/_python-bootstrap/wiki/) especially its
+## Guidelines
+- Don't take assigned issues. Comment if those get staled.
+- If your contribution is far from trivial, open an issue to discuss it first.
+- Ensure your code passes `inv lintall` and `inv checkall`
+## Configure your environment
+Development environment is meant to be managed by `hatch` and commits can be checked with `pre-commit`.
+If not already installed on your machine, install it in your global environment:
+pip install -U hatch pre-commit
+Install precommit
+pre-commit install
+Go to scraper directory:
+cd scraper
+Start a hatch shell to run further commands:
+hatch shell
+Install/Update dependencies:
+pip install -U ".[dev]"
diff --git a/README.md b/README.md
index 79fe5b9..7f5a919 100644
--- a/README.md
+++ b/README.md
@@ -1,52 +1,66 @@
-# FCC on Zim
+# freeCodeCamp scraper
+This scraper downloads selected [freeCodeCamp](https://www.freecodecamp.org/) courses and puts it in a
+[ZIM](https://openzim.org) file, a clean and user friendly format for storing content for offline usage.
+[![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0)
+[![PyPI version shields.io](https://img.shields.io/pypi/v/fcc2zim.svg)](https://pypi.org/project/fcc2zim/)
+[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/fcc2zim.svg)](https://pypi.org/project/fcc2zim/)
+## Architecture
This project consists of two major components:
-- Openzim - The scripts (python) that fetch the latest FCC curriculum and package it into a format client can read, as well as our zim builder
-- Client - A vite app configured to be consumed by a Zim reader.
+- `zimui` - A Vue.JS application specially crafted to:
+ - be embeded inside the ZIM and serve as main entry point (through compilation for offline usage with Vite)
+ - present FCC curriculum, including solving exercices
+ - be compatible with most ZIM readers
+- `scraper` - The Python tool that build FCC ZIM. It is responsible to:
+ - fetch FCC curriculum and package it into a proper format
+ - embed client can read, as well as our zim builder
-## freeCodeCamp Zim build process
+## Dependencies
-This process can be broken down into 5 parts
+Aside Node.JS and Python dependencies which are managed, other binary dependencies comes from Python [zimscraperlib](https://github.com/openzim/python-scraperlib/)
-1. Build the Vite client
-1. Fetch the latest curriculum from freeCodeCamp by downloading the latest release source archive
-1. "Prebuild" the curriculum for a selected langauge and set of courses. Copy to the client directory
-1. Build a Zim file of the resulting Vite application.
## Development
-#### Prerequsites
-- Node 20.x
-- Python 3
-This project comes with .devcontainer to help onboard new developers, with Node 20 and Python3 installed
-See: [`Makefile`](Makefile) for a full build process
+### Prerequisites
-## Building with Docker
+- Node 20.x
+- Python 3.11
-- `docker build -t openzim/fcc2zim .`
-- `docker run --rm -it -v /workspaces/openzim-freecodecamp/tmp:/tmp/fcc2zim openzim/fcc2zim all \
- --clientdir ./client/dist --outdir=./client/dist/fcc --outzim ./build/eng.zim \
- --language eng --tmpdir=/tmp/fcc2zim \
- --course=regular-expressions,basic-javascript,basic-data-structures,debugging,functional-programming,object-oriented-programming,basic-algorithm-scripting,intermediate-algorithm-scripting,javascript-algorithms-and-data-structures-projects \
- --name "fcc_en_javascript" --title "freeCodeCamp Javascript" --description "FCC Javascript Courses"
+### Running scraper locally
-## Course Options and Limitations
+You have to:
+- build the `zimui` frontend which will be embededed inside the ZIM (and redo it every time you make modifications to the `zimui`)
+- run the `scraper` to retrieve FCC curriculum and build the ZIM
-Currently this scraper only supports Javascript challenges. A list of courses is passed to the `prebuild` step as a comma seperated list of 'course slugs'.
+Sample commands:
+cd zimui
+yarn install
+yarn build
+cd ../scraper
+hatch run fcc2zim --language eng --course "regular-expressions,basic-javascript,basic-data-structures,debugging,functional-programming,object-oriented-programming,basic-algorithm-scripting,intermediate-algorithm-scripting,javascript-algorithms-and-data-structures-projects" --name "fcc_en_javascript" --title "freeCodeCamp Javascript" --description "FCC Javascript Courses"
-You can find a list of course slugs in the [freeCodeCamp curriculum folder](https://github.com/freeCodeCamp/freeCodeCamp/tree/main/curriculum/challenges/english/02-javascript-algorithms-and-data-structures)
+### Running scraper with Docker
+Run from official version (published on GHCR.io) ; ZIM will be available in the `output` sub-folder of current working directory.
-python3 openzim/fcc2zim prebuild --course=regular-expressions,basic-javascript,basic-data-structures,debugging,functional-programming,object-oriented-programming,basic-algorithm-scripting,intermediate-algorithm-scripting,javascript-algorithms-and-data-structures-projects --outdir=./client/dist/fcc --language eng --tmpdir=./tmp
+docker run --rm -it -v $(pwd)/output:/output ghcr.io/openzim/freecodecamp:latest --language eng --course "regular-expressions,basic-javascript,basic-data-structures,debugging,functional-programming,object-oriented-programming,basic-algorithm-scripting,intermediate-algorithm-scripting,javascript-algorithms-and-data-structures-projects" --name "fcc_en_javascript" --title "freeCodeCamp Javascript" --description "FCC Javascript Courses"
-# License
+## Course Options and Limitations
+Currently this scraper only supports Javascript challenges. A list of courses is passed to the scraper as a comma seperated list of 'course slugs'.
+You can find a list of course slugs in the [freeCodeCamp curriculum folder](https://github.com/freeCodeCamp/freeCodeCamp/tree/main/curriculum/challenges/english/02-javascript-algorithms-and-data-structures)
-This repository is licensed under GPLv3, with the exception of the freeCodeCamp curriculum which is licensed under BSD 3 Clause (see LICENSE.fcc.md).
+In docker example above, see the `--course` argument : `regular-expressions,basic-javascript,basic-data-structures,debugging,functional-programming,object-oriented-programming,basic-algorithm-scripting,intermediate-algorithm-scripting,javascript-algorithms-and-data-structures-projects`
diff --git a/openzim/README.md b/openzim/README.md
deleted file mode 100644
index 90d294b..0000000
--- a/openzim/README.md
+++ /dev/null
@@ -1,2 +0,0 @@
-# freecodecamp
-FreeCodeCamp.org scraper (to ZIM)
- "ara": "arabic",
- "cmn": "chinese",
- "lzh": "chinese-traditional",
- "eng": "english",
- "spa": "espanol",
- "deu": "german",
- "ita": "italian",
- "jpn": "japanese",
- "por": "portuguese",
- "ukr": "ukranian",
-lock = threading.Lock()
-creator = None
diff --git a/openzim/pypi-readme.rst b/openzim/pypi-readme.rst
deleted file mode 100644
index 9ba5d13..0000000
--- a/openzim/pypi-readme.rst
+++ /dev/null
@@ -1,71 +0,0 @@
-@TODO Update this for fcc2zim usage
-A scraper that downloads the whole JS course material for FCC
-(http://freecodecamp.org) and puts it into a locally browsable
-directory and then in a ZIM file (http://www.openzim.org), a clean and
-user friendly format for storing content for offline usage.
-.. code-block:: sh
- python-pip python-dev libxml2-dev libxslt-dev advancecomp jpegoptim pngquant p7zip-full gifsicle
-.. code-block:: sh
- brew install advancecomp jpegoptim pngquant p7zip gifsicle
-.. code-block:: sh
- fcc2zim
-By default (no argument), it runs all the steps: download, parse, export and zim.
-.. code-block:: sh
- -h --help Display this help message
- -y --wipe-db Do not wipe the DB during parse stage
- -F --force Redo step even if target already exist
- -l --languages= Comma-separated list of lang codes to filter export to (preferably ISO 639-1, else ISO 639-3)
- -f --formats= Comma-separated list of formats to filter export to (epub, html, pdf, all)
- -m --mirror= Use URL as base for all downloads.
- -r --rdf-folder= Don't download rdf-files.tar.bz2 and use extracted folder instead
- -e --static-folder= Use-as/Write-to this folder static HTML
- -z --zim-file= Write ZIM into this file path
- -t --zim-title= Set ZIM title
- -n --zim-desc= Set ZIM description
- -d --dl-folder= Folder to use/write-to downloaded ebooks
- -u --rdf-url= Alternative rdf-files.tar.bz2 URL
- -b --books= Execute the processes for specific books, separated by commas, or dashes for intervals
- -c --concurrency= Number of concurrent process for download and parsing tasks
- -x --zim-title= Custom title for the ZIM file
- -q --zim-desc= Custom description for the ZIM file
- --check Check dependencies
- --prepare Download & extract rdf-files.tar.bz2
- --parse Parse all RDF files and fill-up the DB
- --download Download ebooks based on filters
- --export Export downloaded content to zim-friendly static HTML
- --dev Exports *just* Home+JS+CSS files (overwritten by --zim step)
- --zim Create a ZIM file
diff --git a/openzim/.dockerignore b/scraper/.dockerignore
diff --git a/scraper/pyproject.toml b/scraper/pyproject.toml
new file mode 100644
index 0000000..9cda4db
--- /dev/null
+++ b/scraper/pyproject.toml
@@ -0,0 +1,223 @@
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+name = "fcc2zim"
+authors = [
+ { name = "Kiwix", email = "dev@kiwix.org" },
+keywords = ["fcc","freecodecamp","zim","kiwix","openzim","offline"]
+requires-python = ">=3.11"
+description = "Make ZIM files from freeCodeCamp courses"
+readme = "../README.md"
+license = {text = "GPL-3.0-or-later"}
+classifiers = [
+ "Programming Language :: Python :: 3",
+ "Programming Language :: Python :: 3.11",
+ "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
+dependencies = [
+ "zimscraperlib==3.1.1",
+ "requests==2.31.0",
+ "PyYAML==6.0.1",
+dynamic = ["version"]
+scripts = [
+ "invoke==2.2.0",
+lint = [
+ "black==23.7.0",
+ "ruff==0.0.285",
+check = [
+ "pyright==1.1.323",
+test = [
+ "pytest==7.4.0",
+ "coverage==7.3.0",
+dev = [
+ "pre-commit==3.3.3",
+ "debugpy==1.6.7",
+ "fcc2zim[scripts]",
+ "fcc2zim[lint]",
+ "fcc2zim[test]",
+ "fcc2zim[check]",
+Homepage = "https://github.com/openzim/freecodecamp"
+Donate = "https://www.kiwix.org/en/support-us/"
+fcc2zim = "fcc2zim:entrypoint.main"
+path = "src/fcc2zim/__about__.py"
+features = ["dev"]
+features = ["scripts", "test"]
+run = "inv test --args '{args}'"
+run-cov = "inv test-cov --args '{args}'"
+report-cov = "inv report-cov"
+coverage = "inv coverage --args '{args}'"
+html = "inv coverage --html --args '{args}'"
+template = "lint"
+skip-install = false
+features = ["scripts", "lint"]
+black = "inv lint-black --args '{args}'"
+ruff = "inv lint-ruff --args '{args}'"
+all = "inv lintall --args '{args}'"
+fix-black = "inv fix-black --args '{args}'"
+fix-ruff = "inv fix-ruff --args '{args}'"
+fixall = "inv fixall --args '{args}'"
+features = ["scripts", "check"]
+pyright = "inv check-pyright --args '{args}'"
+all = "inv checkall --args '{args}'"
+line-length = 88
+target-version = ['py311']
+target-version = "py311"
+line-length = 88
+src = ["src"]
+select = [
+ "A", # flake8-builtins
+ # "ANN", # flake8-annotations
+ "ARG", # flake8-unused-arguments
+ # "ASYNC", # flake8-async
+ "B", # flake8-bugbear
+ # "BLE", # flake8-blind-except
+ "C4", # flake8-comprehensions
+ "C90", # mccabe
+ # "COM", # flake8-commas
+ # "D", # pydocstyle
+ # "DJ", # flake8-django
+ "DTZ", # flake8-datetimez
+ "E", # pycodestyle (default)
+ "EM", # flake8-errmsg
+ # "ERA", # eradicate
+ # "EXE", # flake8-executable
+ "F", # Pyflakes (default)
+ # "FA", # flake8-future-annotations
+ "FBT", # flake8-boolean-trap
+ # "FLY", # flynt
+ # "G", # flake8-logging-format
+ "I", # isort
+ "ICN", # flake8-import-conventions
+ # "INP", # flake8-no-pep420
+ # "INT", # flake8-gettext
+ "ISC", # flake8-implicit-str-concat
+ "N", # pep8-naming
+ # "NPY", # NumPy-specific rules
+ # "PD", # pandas-vet
+ # "PGH", # pygrep-hooks
+ # "PIE", # flake8-pie
+ # "PL", # Pylint
+ "PLC", # Pylint: Convention
+ "PLE", # Pylint: Error
+ "PLR", # Pylint: Refactor
+ "PLW", # Pylint: Warning
+ # "PT", # flake8-pytest-style
+ # "PTH", # flake8-use-pathlib
+ # "PYI", # flake8-pyi
+ "Q", # flake8-quotes
+ # "RET", # flake8-return
+ # "RSE", # flake8-raise
+ "RUF", # Ruff-specific rules
+ "S", # flake8-bandit
+ # "SIM", # flake8-simplify
+ # "SLF", # flake8-self
+ "T10", # flake8-debugger
+ "T20", # flake8-print
+ # "TCH", # flake8-type-checking
+ # "TD", # flake8-todos
+ "TID", # flake8-tidy-imports
+ # "TRY", # tryceratops
+ "UP", # pyupgrade
+ "W", # pycodestyle
+ "YTT", # flake8-2020
+ignore = [
+ # Allow non-abstract empty methods in abstract base classes
+ "B027",
+ # Allow use of date.today
+ "DTZ011",
+ # Remove flake8-errmsg since we consider they bloat the code and provide limited value
+ "EM",
+ # Allow boolean positional values in function calls, like `dict.get(... True)`
+ "FBT003",
+ # Ignore checks for possible passwords
+ "S105", "S106", "S107",
+ # Ignore warnings on subprocess.run / popen
+ "S603",
+ # Ignore complexity
+ "C901", "PLR0911", "PLR0912", "PLR0913", "PLR0915",
+unfixable = [
+ # Don't touch unused imports
+ "F401",
+known-first-party = ["fcc2zim"]
+# add exceptions to B008 for fastapi.
+extend-immutable-calls = ["fastapi.Depends", "fastapi.Query"]
+ban-relative-imports = "all"
+# Tests can use magic values, assertions, and relative imports
+"tests/**/*" = ["PLR2004", "S101", "TID252"]
+minversion = "7.4"
+testpaths = ["tests"]
+pythonpath = [".", "src"]
+fcc2zim = ["src/fcc2zim"]
+tests = ["tests"]
+source_pkgs = ["fcc2zim"]
+branch = true
+parallel = true
+omit = [
+ "src/fcc2zim/__about__.py",
+exclude_lines = [
+ "no cov",
+ "if __name__ == .__main__.:",
+include = ["src", "tests", "tasks.py"]
+exclude = [".env/**", ".venv/**"]
+extraPaths = ["src"]
+pythonVersion = "3.11"
diff --git a/scraper/src/fcc2zim/__about__.py b/scraper/src/fcc2zim/__about__.py
new file mode 100644
index 0000000..4e8976a
--- /dev/null
+++ b/scraper/src/fcc2zim/__about__.py
@@ -0,0 +1 @@
+__version__ = "1.0.0-dev0"
diff --git a/client/.eslintignore b/scraper/src/fcc2zim/__init__.py
similarity index 100%
rename from client/.eslintignore
rename to scraper/src/fcc2zim/__init__.py
diff --git a/scraper/src/fcc2zim/__main__.py b/scraper/src/fcc2zim/__main__.py
new file mode 100644
index 0000000..85a0eb7
--- /dev/null
+++ b/scraper/src/fcc2zim/__main__.py
@@ -0,0 +1,4 @@
+from fcc2zim.entrypoint import main
+if __name__ == "__main__":
+ main()
diff --git a/openzim/fcc_48.png b/scraper/src/fcc2zim/assets/fcc_48.png
similarity index 100%
rename from openzim/fcc_48.png
rename to scraper/src/fcc2zim/assets/fcc_48.png
diff --git a/scraper/src/fcc2zim/build.py b/scraper/src/fcc2zim/build.py
new file mode 100644
index 0000000..549b14e
--- /dev/null
+++ b/scraper/src/fcc2zim/build.py
@@ -0,0 +1,91 @@
+import json
+from collections import OrderedDict
+from pathlib import Path
+from zimscraperlib.zim import Creator
+from fcc2zim.constants import Global
+def build_curriculum_redirects(curriculum_dist_dir: Path, fcc_lang: str):
+ """
+ Build the list of redirects from challenge URL to Vite hash URL
+ The Vite app uses its own router to navigate. We have a single HTML file, but we
+ need an URL for each challenge for the zim search to work.
+ This builds the list of redirect needed fron the challenge URL to Vite hash URL.
+ """
+ index_json_path = curriculum_dist_dir.joinpath("curriculum", fcc_lang, "index.json")
+ with open(index_json_path) as course_index_str:
+ superblock_dict = json.load(course_index_str)[fcc_lang]
+ redirects = []
+ for superblock in superblock_dict:
+ course_list = superblock_dict[superblock]
+ for course in course_list:
+ meta_json_path = Path(
+ curriculum_dist_dir,
+ "curriculum",
+ fcc_lang,
+ superblock,
+ course,
+ "_meta.json",
+ )
+ challenges = json.loads(meta_json_path.read_text())["challenges"]
+ for challenge in challenges:
+ title = challenge["title"]
+ redirects.append(
+ (f'{fcc_lang}/{superblock}/{course}/{challenge["slug"]}', title)
+ )
+ return OrderedDict(redirects).items()
+def build_command(
+ zimui_dist_dir: Path,
+ fcc_lang: str,
+ creator: Creator,
+ curriculum_dist_dir: Path,
+ Global.logger.info("Scraper: build phase starting")
+ # Add zimui files
+ for file in zimui_dist_dir.rglob("*"):
+ if file.is_dir():
+ continue
+ path = str(Path(file).relative_to(zimui_dist_dir))
+ Global.logger.debug(f"Adding {path} to ZIM")
+ creator.add_item_for(path, fpath=file)
+ # Add prebuild generated curriculum file
+ for file in curriculum_dist_dir.rglob("*"):
+ if file.is_dir():
+ continue
+ path = str(Path("fcc").joinpath(Path(file).relative_to(curriculum_dist_dir)))
+ Global.logger.debug(f"Adding {path} to ZIM")
+ creator.add_item_for(path, fpath=file)
+ for redir_slug, redir_title in build_curriculum_redirects(
+ curriculum_dist_dir=curriculum_dist_dir, fcc_lang=fcc_lang
+ ):
+ redirect_path = f"{redir_slug}"
+ redirect_url = redir_slug.count("/") * "../" + f"index.html#{redir_slug}"
+ content = (
+ f"{redir_title}"
+ f''
+ f""
+ )
+ Global.logger.debug(
+ f"Redirecting {redirect_path} to {redirect_url} for slug {redir_slug}"
+ f"and title {redir_title}",
+ )
+ creator.add_item_for(
+ redirect_path,
+ content=bytes(content, "utf-8"),
+ title=redir_title,
+ mimetype="text/html",
+ is_front=True,
+ )
+ # Example index.html#/english/regular-expressions/extract-matches
+ Global.logger.info("Scraper: build phase finished")
diff --git a/openzim/fcctozim/challenge.py b/scraper/src/fcc2zim/challenge.py
similarity index 90%
rename from openzim/fcctozim/challenge.py
rename to scraper/src/fcc2zim/challenge.py
index 32c88bc..2ccae30 100644
--- a/openzim/fcctozim/challenge.py
+++ b/scraper/src/fcc2zim/challenge.py
@@ -1,5 +1,4 @@
import pathlib
-from typing import Union
import yaml
@@ -15,14 +14,14 @@ def read_yaml_frontmatter(filename: pathlib.Path):
class Challenge:
- def __init__(self, fpath: Union[str, pathlib.Path]) -> None:
+ def __init__(self, fpath: str | pathlib.Path) -> None:
self.path = pathlib.Path(fpath)
self.course_slug = self.path.parent.stem
self.course_superblock = "-".join(self.path.parent.parent.stem.split("-")[1:])
self.language = self.path.parent.parent.parent.stem
self._frontmatter = None
- def id(self):
+ def identifier(self):
return str(self.frontmatter()["id"])
def title(self):
diff --git a/scraper/src/fcc2zim/constants.py b/scraper/src/fcc2zim/constants.py
new file mode 100644
index 0000000..ef2c2d0
--- /dev/null
+++ b/scraper/src/fcc2zim/constants.py
@@ -0,0 +1,32 @@
+import logging
+from zimscraperlib.logging import getLogger
+from fcc2zim.__about__ import __version__
+ "ara": "arabic",
+ "cmn": "chinese",
+ "lzh": "chinese-traditional",
+ "eng": "english",
+ "spa": "espanol",
+ "deu": "german",
+ "ita": "italian",
+ "jpn": "japanese",
+ "por": "portuguese",
+ "ukr": "ukranian",
+VERSION = __version__
+class Global:
+ debug = False
+ logger: logging.Logger = getLogger("fcc2zim", level=logging.INFO)
+def set_debug(*, debug: bool):
+ Global.debug = debug
+ Global.logger = getLogger( # refresh logger to update log level
+ "fcc2zim", level=logging.DEBUG if Global.debug else logging.INFO
+ )
diff --git a/scraper/src/fcc2zim/entrypoint.py b/scraper/src/fcc2zim/entrypoint.py
new file mode 100644
index 0000000..8623a8f
--- /dev/null
+++ b/scraper/src/fcc2zim/entrypoint.py
@@ -0,0 +1,165 @@
+import argparse
+import datetime
+import functools
+import os
+from zimscraperlib.constants import (
+from zimscraperlib.constants import (
+from fcc2zim.constants import FCC_LANG_MAP, VERSION, Global, set_debug
+from fcc2zim.scraper import Scraper
+def log_and_sys_exit(func):
+ @functools.wraps(func)
+ def wrapper():
+ try:
+ func()
+ except SystemExit: # SystemExit has been asked for at lower level, simply do it
+ raise
+ except Exception as exc:
+ Global.logger.error(f"A fatal error occurred: {exc}")
+ Global.logger.exception(exc)
+ raise SystemExit(1) from exc
+ return wrapper
+def main():
+ parser = argparse.ArgumentParser(
+ prog="fcc2zim",
+ description="Scraper to create ZIM files from freeCodeCamp courses",
+ )
+ parser.add_argument(
+ "--course",
+ type=str,
+ help="Course or course list (separated by commas)",
+ required=True,
+ )
+ parser.add_argument(
+ "--language",
+ type=str,
+ help="Curriculum language",
+ required=True,
+ choices=FCC_LANG_MAP.keys(),
+ )
+ parser.add_argument(
+ "--name",
+ type=str,
+ help="ZIM name. Used as identifier and filename (date will be appended)",
+ required=True,
+ )
+ parser.add_argument(
+ "--title",
+ type=str,
+ # once Zimscraperlib > 3.1.1 is released, use constant from library
+ # instead of '30' magic number
+ help="Title of zim file (less than 30 chars)",
+ required=True,
+ )
+ parser.add_argument(
+ "--description",
+ type=str,
+ help=f"Description of ZIM file (less than {MAX_DESC_LENGTH} chars)",
+ required=True,
+ )
+ parser.add_argument(
+ "--long-description",
+ type=str,
+ help=f"Long description of ZIM file (less than {MAX_LONG_DESC_LENGTH} chars)",
+ )
+ parser.add_argument(
+ "--creator",
+ type=str,
+ help="Name of freeCodeCamp courses creator",
+ default="freeCodeCamp",
+ )
+ parser.add_argument(
+ "--publisher", type=str, help="Publisher of the zim file", default="OpenZIM"
+ )
+ parser.add_argument(
+ "--force",
+ help="Force a full reprocessing, not benefiting from any cached file",
+ action="store_true",
+ default=False,
+ )
+ parser.add_argument(
+ "--debug",
+ help="Enable verbose output",
+ action="store_true",
+ default=False,
+ )
+ parser.add_argument(
+ "--output-dir",
+ type=str,
+ help="Output directory where zim file will be built",
+ default=os.getenv("OUTPUT_DIR", "../output"),
+ )
+ parser.add_argument(
+ "--build-dir",
+ type=str,
+ help="The build directory to hold temporary files during scraper operation",
+ default=os.getenv("BUILD_DIR", "../build"),
+ )
+ parser.add_argument(
+ "--zimui-dist-dir",
+ type=str,
+ help=(
+ "Directory containing Vite build output from the Zim UI Vue.JS application"
+ ),
+ default=os.getenv("ZIMUI_DIST_DIR", "../zimui/dist"),
+ )
+ parser.add_argument(
+ "--zim-file",
+ type=str,
+ help="ZIM file name (based on --name if not provided), could contain {period}"
+ " placeholder which will be replaced by _",
+ )
+ parser.add_argument(
+ "--zip-path",
+ help="Path to zip file containing FCC courses",
+ type=str,
+ )
+ parser.add_argument(
+ "--version",
+ help="Display scraper version and exit",
+ action="version",
+ version=f"fcc2zim {VERSION}",
+ )
+ args = parser.parse_args()
+ Global.logger.info(f"Starting fcc2zim {VERSION}")
+ set_debug(debug=args.debug)
+ scraper = Scraper(
+ do_fetch=os.getenv("DO_FETCH", "False").lower() == "true",
+ do_prebuild=os.getenv("DO_PREBUILD", "False").lower() == "true",
+ do_build=os.getenv("DO_BUILD", "False").lower() == "true",
+ zimui_dist_dir=args.zimui_dist_dir,
+ output_dir=args.output_dir,
+ build_dir=args.build_dir,
+ language=args.language,
+ name=args.name,
+ title=args.title,
+ description=args.description,
+ long_description=args.long_description,
+ content_creator=args.creator,
+ publisher=args.publisher,
+ zim_file=args.zim_file,
+ force=args.force,
+ course_csv=args.course,
+ zip_path=args.zip_path,
+ start_date=datetime.date.today(),
+ )
+ scraper.run()
+ Global.logger.info("Scraper completed")
diff --git a/scraper/src/fcc2zim/fetch.py b/scraper/src/fcc2zim/fetch.py
new file mode 100644
index 0000000..b43f700
--- /dev/null
+++ b/scraper/src/fcc2zim/fetch.py
@@ -0,0 +1,36 @@
+import shutil
+import zipfile
+from pathlib import Path
+import requests
+from fcc2zim.constants import Global
+def fetch_command(zip_path: Path, curriculum_raw_dir: Path, *, force: bool):
+ Global.logger.info("Scraper: fetch phase starting")
+ url = "https://github.com/freeCodeCamp/freeCodeCamp/archive/refs/heads/main.zip"
+ # Don't redownload the file if we already have it (it's a large file)
+ if force or not zip_path.exists():
+ Global.logger.debug(f"Download zip file to {zip_path}")
+ resp = requests.get(url, allow_redirects=True, timeout=5)
+ zip_path.write_bytes(resp.content)
+ else:
+ Global.logger.debug(f"Using existing zip file {zip_path}")
+ curriculum_raw_dir.mkdir(parents=True, exist_ok=True)
+ shutil.rmtree(curriculum_raw_dir)
+ Global.logger.debug("Extracting files")
+ with zipfile.ZipFile(zip_path, "r") as zip_ref:
+ members = [
+ member
+ for member in zip_ref.namelist()
+ if member.startswith("freeCodeCamp-main/curriculum/")
+ or member.startswith("freeCodeCamp-main/client/i18n/locales")
+ ]
+ zip_ref.extractall(members=members, path=curriculum_raw_dir)
+ Global.logger.info(f"Extracted {len(members)} files")
+ Global.logger.info(f"Fetched curriculum into {curriculum_raw_dir}")
+ Global.logger.info("Scraper: fetch phase finished")
diff --git a/openzim/fcctozim/prebuild.py b/scraper/src/fcc2zim/prebuild.py
similarity index 52%
rename from openzim/fcctozim/prebuild.py
rename to scraper/src/fcc2zim/prebuild.py
index 8fe44f9..67e0a70 100644
--- a/openzim/fcctozim/prebuild.py
+++ b/scraper/src/fcc2zim/prebuild.py
@@ -1,17 +1,16 @@
import json
-import pathlib
import shutil
-from typing import List
+from pathlib import Path
-from fcctozim import FCC_LANG_MAP
-from fcctozim.challenge import Challenge
+from fcc2zim.challenge import Challenge
+from fcc2zim.constants import Global
def get_challenges_for_lang(tmp_path, language="english"):
- return pathlib.Path(tmp_path, language).rglob("*.md")
+ return Path(tmp_path, language).rglob("*.md")
-def update_index(path: pathlib.Path, superblock: str, slug: str, language="english"):
+def update_index(path: Path, superblock: str, slug: str, language="english"):
index_path = path.joinpath("index.json")
if not index_path.exists():
@@ -33,17 +32,15 @@ def update_index(path: pathlib.Path, superblock: str, slug: str, language="engli
-def write_locales_to_path(
- source_dir: pathlib.Path, outdir: pathlib.Path, language="english"
- shutil.copytree(source_dir, outdir / "locales" / language)
+def write_locales_to_path(source_dir: Path, curriculumdir: Path, language="english"):
+ shutil.copytree(source_dir, curriculumdir / "locales" / language)
def write_course_to_path(
- challenge_list: List[Challenge],
+ challenge_list: list[Challenge],
superblock: str,
course_slug: str,
- outdir: pathlib.Path,
+ curriculumdir: Path,
"""Writes the course to the chosen path.
@@ -53,11 +50,11 @@ def write_course_to_path(
Finally, we udpate the root index.json file with the course, which allows
us to render a page listing all available courses
- outdir.mkdir(parents=True, exist_ok=True)
+ curriculumdir.mkdir(parents=True, exist_ok=True)
meta = {"challenges": []}
for challenge in challenge_list:
- challenge_dest_path = outdir.joinpath(
+ challenge_dest_path = curriculumdir.joinpath(
challenge.course_superblock, challenge.course_slug
challenge_dest_path.mkdir(parents=True, exist_ok=True)
@@ -66,39 +63,48 @@ def write_course_to_path(
{"title": challenge.title(), "slug": challenge.path.stem}
- meta_path = outdir.joinpath(superblock, course_slug, "_meta.json")
+ meta_path = curriculumdir.joinpath(superblock, course_slug, "_meta.json")
meta_path.parent.mkdir(parents=True, exist_ok=True)
with open(meta_path, "w") as outfile:
json.dump(meta, outfile, indent=4)
# Create an index with a list of the courses
- update_index(outdir, superblock, course_slug, challenge_list[0].language)
+ update_index(curriculumdir, superblock, course_slug, challenge_list[0].language)
-def prebuild_command(arguments):
- """Writes out a structure of challenges to output dir:
+def prebuild_command(
+ course_csv: str,
+ fcc_lang: str,
+ curriculum_raw_dir: Path,
+ curriculum_dist_dir: Path,
+ """Transform raw data in curriculum_raw_dir into pre-built data in
+ curriculum_dist_dir
- /output_dir/index.json => { 'english': {'superblock': ['basic-javascript'] } }
- /output_dir/english///_meta.json
+ E.g. if lang in english:
+ - curriculum_dist_dir/index.json
+ => { 'english': {'superblock': ['basic-javascript'] } }
+ - curriculum_dist_dir/english///_meta.json
=> { challenges: [{slug, title}] }
- /output_dir/english///{slug}.md
+ - curriculum_dist_dir/english///{slug}.md
- course_list_str = str(arguments.course)
- outdir = pathlib.Path(arguments.outdir)
- lang = FCC_LANG_MAP[arguments.language]
- tmpdir = arguments.tmpdir or "./tmp"
- curriculum_dir = pathlib.Path(
- tmpdir, "curriculum", "freeCodeCamp-main", "curriculum", "challenges"
+ Global.logger.info("Scraper: prebuild phase starting")
+ curriculum_dist_dir.mkdir(parents=True, exist_ok=True)
+ shutil.rmtree(curriculum_dist_dir)
+ challenges_dir = curriculum_raw_dir.joinpath(
+ "freeCodeCamp-main", "curriculum", "challenges"
- locales_dir = pathlib.Path(
- tmpdir, "curriculum", "freeCodeCamp-main", "client", "i18n", "locales", lang
+ locales_dir = curriculum_raw_dir.joinpath(
+ "freeCodeCamp-main", "client", "i18n", "locales", fcc_lang
# eg. ['basic-javascript', 'debugging']
- for course in course_list_str.split(","):
- print(f"Prebuilding {course}")
+ for course in course_csv.split(","):
+ Global.logger.debug(f"Prebuilding {course}")
meta = json.loads(
- curriculum_dir.joinpath("_meta", course, "meta.json").read_text()
+ challenges_dir.joinpath("_meta", course, "meta.json").read_text()
# Get the order that the challenges should be completed in for
ids = [
@@ -107,24 +113,24 @@ def prebuild_command(arguments):
superblock = meta["superBlock"]
- challenge_list: List[Challenge] = []
- for file in get_challenges_for_lang(curriculum_dir, lang):
+ challenge_list: list[Challenge] = []
+ for file in get_challenges_for_lang(challenges_dir, fcc_lang):
challenge = Challenge(file)
if challenge.course_superblock != superblock:
# ID is a UUID the Challenge, the only add it to the challenge list if it's
# a part of the course.
- if challenge.id() in ids:
+ if challenge.identifier() in ids:
- sorted(challenge_list, key=lambda x: ids.index(x.id())),
+ sorted(challenge_list, key=lambda x: ids.index(x.identifier())),
- outdir.joinpath("curriculum", lang),
+ curriculum_dist_dir.joinpath("curriculum", fcc_lang),
- print(f"Prebuilt {course}")
# Copy all the locales for this language
- write_locales_to_path(locales_dir, outdir, lang)
- print(f"Prebuilt curriculum into {outdir}")
+ write_locales_to_path(locales_dir, curriculum_dist_dir, fcc_lang)
+ Global.logger.info(f"Prebuilt curriculum into {curriculum_dist_dir}")
+ Global.logger.info("Scraper: prebuild phase finished")
diff --git a/scraper/src/fcc2zim/scraper.py b/scraper/src/fcc2zim/scraper.py
new file mode 100644
index 0000000..9430cef
--- /dev/null
+++ b/scraper/src/fcc2zim/scraper.py
@@ -0,0 +1,166 @@
+import datetime
+from pathlib import Path
+from zimscraperlib.zim import Creator
+from fcc2zim.build import build_command
+from fcc2zim.constants import FCC_LANG_MAP, VERSION, Global
+from fcc2zim.fetch import fetch_command
+from fcc2zim.prebuild import prebuild_command
+from fcc2zim.zimscraperlib_fork import compute_descriptions
+class Scraper:
+ def __init__(
+ self,
+ *,
+ do_fetch: bool,
+ do_prebuild: bool,
+ do_build: bool,
+ zimui_dist_dir: str,
+ output_dir: str,
+ build_dir: str,
+ language: str,
+ name: str,
+ title: str,
+ description: str,
+ long_description: str | None,
+ content_creator: str,
+ publisher: str,
+ zim_file: str | None,
+ force: bool,
+ course_csv: str,
+ zip_path: str | None,
+ start_date: datetime.date,
+ ):
+ self.creator = None
+ self.do_fetch = do_fetch
+ self.do_prebuild = do_prebuild
+ self.do_build = do_build
+ if not (self.do_fetch + self.do_prebuild + self.do_build):
+ self.do_fetch = self.do_prebuild = self.do_build = True
+ self.zimui_dist_dir = Path(zimui_dist_dir)
+ if not self.zimui_dist_dir.exists():
+ raise ValueError(f"zimui_dist_dir {self.zimui_dist_dir} does not exists")
+ self.output_dir = Path(output_dir)
+ self.build_dir = Path(build_dir)
+ self.curriculum_raw_dir = self.build_dir.joinpath("curriculum-raw")
+ self.curriculum_dist_dir = self.build_dir.joinpath("curriculum-dist")
+ # Make sure the output directory exists
+ self.output_dir.mkdir(parents=True, exist_ok=True)
+ self.build_dir.mkdir(parents=True, exist_ok=True)
+ self.language = language
+ if self.language not in FCC_LANG_MAP:
+ raise ValueError(f"Unsupported language {self.language}")
+ self.fcc_lang = FCC_LANG_MAP[language]
+ self.name = name
+ self.title = title
+ self.description = description
+ self.long_description = long_description
+ self.description, self.long_description = compute_descriptions(
+ self.description, self.description, self.long_description
+ )
+ self.content_creator = content_creator
+ self.publisher = publisher
+ self.force = force
+ self.course_csv = course_csv
+ if not zip_path:
+ self.zip_path = self.build_dir.joinpath("main.zip")
+ else:
+ self.zip_path = Path(zip_path)
+ if not self.zip_path.exists():
+ raise ValueError(f"Zip file not found in {self.zip_path}")
+ # if we do not build the ZIM, we can stop here
+ if not self.do_build:
+ return
+ period = start_date.strftime("%Y-%m")
+ if zim_file:
+ self.zim_path = Path(zim_file.format(period=period))
+ # make sure we were given a filename and not a path
+ if Path(self.zim_path.name) != self.zim_path:
+ raise ValueError(f"zim_name is not a filename: {zim_file}")
+ else:
+ self.zim_path = Path(f"{name}_{period}.zim")
+ # build full path
+ self.zim_path = self.output_dir.joinpath(self.zim_path)
+ if self.zim_path.exists():
+ if not self.force:
+ raise ValueError(f"ZIM file {self.zim_path} already exist.")
+ Global.logger.info(f"Removing existing ZIM file {self.zim_path}")
+ self.zim_path.unlink()
+ else:
+ Global.logger.info(f"ZIM path: {self.zim_path}")
+ logo_path = Path(__file__).parent.joinpath("assets", "fcc_48.png")
+ if not logo_path.exists():
+ raise ValueError(f"Logo not found at {logo_path}")
+ self.creator = Creator(self.zim_path, "index.html").config_metadata(
+ Name=self.name,
+ Title=self.title,
+ Publisher=self.publisher,
+ Date=start_date,
+ Creator=self.content_creator,
+ Description=self.description,
+ LongDescription=self.long_description,
+ Language=self.language,
+ Tags=";".join(["FCC", "freeCodeCamp"]),
+ Scraper=f"fcc2zim v{VERSION}",
+ Illustration_48x48_at_1=logo_path.read_bytes(),
+ )
+ # start creator early to detect any problem early as well
+ self.creator.start()
+ def run(self):
+ try:
+ self.run_commands()
+ except Exception as exc:
+ if self.creator:
+ self.creator.can_finish = False
+ if isinstance(exc, KeyboardInterrupt):
+ Global.logger.error("KeyboardInterrupt, exiting.")
+ raise SystemExit(3) from exc
+ else:
+ Global.logger.error(f"Interrupting process due to error: {exc}")
+ Global.logger.exception(exc)
+ raise SystemExit(2) from exc
+ else:
+ if self.creator:
+ self.creator.finish()
+ Global.logger.info(f"Finished creating Zim at {self.zim_path}")
+ def run_commands(self):
+ if self.do_fetch:
+ fetch_command(
+ force=self.force,
+ curriculum_raw_dir=self.curriculum_raw_dir,
+ zip_path=self.zip_path,
+ )
+ if self.do_prebuild:
+ prebuild_command(
+ fcc_lang=self.fcc_lang,
+ course_csv=self.course_csv,
+ curriculum_raw_dir=self.curriculum_raw_dir,
+ curriculum_dist_dir=self.curriculum_dist_dir,
+ )
+ if self.do_build:
+ build_command(
+ fcc_lang=self.fcc_lang,
+ creator=self.creator,
+ zimui_dist_dir=self.zimui_dist_dir,
+ curriculum_dist_dir=self.curriculum_dist_dir,
+ )
diff --git a/scraper/src/fcc2zim/zimscraperlib_fork.py b/scraper/src/fcc2zim/zimscraperlib_fork.py
new file mode 100644
index 0000000..0caa9d1
--- /dev/null
+++ b/scraper/src/fcc2zim/zimscraperlib_fork.py
@@ -0,0 +1,62 @@
+from zimscraperlib.constants import (
+from zimscraperlib.constants import (
+# This function will be released in zimscraperlib 3.1.2
+# Until then, it is forked here for convenience
+def compute_descriptions(
+ default_description: str,
+ user_description: str | None,
+ user_long_description: str | None,
+) -> tuple[str, str | None]:
+ """Computes short and long descriptions compliant with ZIM standard.
+ Based on provided parameters, the function computes a short and a long description
+ which are compliant with the ZIM standard (in terms of length).
+ User description(s) are used if set. They are checked to not exceed ZIM standard
+ maximum length ; an error is thrown otherwise ; if ok, they are returned.
+ If user_description is not set, the description is computed based on the default
+ description, truncated if needed.
+ If user_long_description is not set and default description is too long for the
+ description field, the long_description is computed based on the default description
+ (truncated if needed), otherwise no long description is returned.
+ args:
+ default_description: the description which will be used if user descriptions
+ are not set (typically fetched online)
+ user_description: the description set by the user (typically set by a
+ CLI argument)
+ user_long_description: the long description set by the user (typically set by a
+ CLI argument)
+ Returns a tuple of (description, long_description)
+ """
+ if user_description and len(user_description) > MAX_DESC_LENGTH:
+ raise ValueError(
+ f"Description too long ({len(user_description)}>{MAX_DESC_LENGTH})"
+ )
+ if user_long_description and len(user_long_description) > MAX_LONG_DESC_LENGTH:
+ raise ValueError(
+ f"LongDescription too long ({len(user_long_description)}"
+ )
+ if not user_long_description and len(default_description) > MAX_DESC_LENGTH:
+ user_long_description = default_description[0:MAX_LONG_DESC_LENGTH]
+ if len(default_description) > MAX_LONG_DESC_LENGTH:
+ user_long_description = user_long_description[:-1] + "…"
+ if not user_description:
+ user_description = default_description[0:MAX_DESC_LENGTH]
+ if len(default_description) > MAX_DESC_LENGTH:
+ user_description = user_description[:-1] + "…"
+ return (user_description, user_long_description)
diff --git a/scraper/tasks.py b/scraper/tasks.py
new file mode 100644
index 0000000..90854e8
--- /dev/null
+++ b/scraper/tasks.py
@@ -0,0 +1,109 @@
+# pyright: strict, reportUntypedFunctionDecorator=false
+import os
+from invoke.context import Context
+from invoke.tasks import task # pyright: ignore [reportUnknownVariableType]
+use_pty = not os.getenv("CI", "")
+@task(optional=["args"], help={"args": "pytest additional arguments"})
+def test(ctx: Context, args: str = ""):
+ """run tests (without coverage)"""
+ ctx.run(f"pytest {args}", pty=use_pty)
+@task(optional=["args"], help={"args": "pytest additional arguments"})
+def test_cov(ctx: Context, args: str = ""):
+ """run test vith coverage"""
+ ctx.run(f"coverage run -m pytest {args}", pty=use_pty)
+@task(optional=["html"], help={"html": "flag to export html report"})
+def report_cov(ctx: Context, *, html: bool = False):
+ """report coverage"""
+ ctx.run("coverage combine", warn=True, pty=use_pty)
+ ctx.run("coverage report --show-missing", pty=use_pty)
+ if html:
+ ctx.run("coverage html", pty=use_pty)
+ optional=["args", "html"],
+ help={
+ "args": "pytest additional arguments",
+ "html": "flag to export html report",
+ },
+def coverage(ctx: Context, args: str = "", *, html: bool = False):
+ """run tests and report coverage"""
+ test_cov(ctx, args=args)
+ report_cov(ctx, html=html)
+@task(optional=["args"], help={"args": "black additional arguments"})
+def lint_black(ctx: Context, args: str = "."):
+ args = args or "." # needed for hatch script
+ ctx.run("black --version", pty=use_pty)
+ ctx.run(f"black --check --diff {args}", pty=use_pty)
+@task(optional=["args"], help={"args": "ruff additional arguments"})
+def lint_ruff(ctx: Context, args: str = "."):
+ args = args or "." # needed for hatch script
+ ctx.run("ruff --version", pty=use_pty)
+ ctx.run(f"ruff check {args}", pty=use_pty)
+ optional=["args"],
+ help={
+ "args": "linting tools (black, ruff) additional arguments, typically a path",
+ },
+def lintall(ctx: Context, args: str = "."):
+ """Check linting"""
+ args = args or "." # needed for hatch script
+ lint_black(ctx, args)
+ lint_ruff(ctx, args)
+@task(optional=["args"], help={"args": "check tools (pyright) additional arguments"})
+def check_pyright(ctx: Context, args: str = ""):
+ """check static types with pyright"""
+ ctx.run("pyright --version")
+ ctx.run(f"pyright {args}", pty=use_pty)
+@task(optional=["args"], help={"args": "check tools (pyright) additional arguments"})
+def checkall(ctx: Context, args: str = ""):
+ """check static types"""
+ check_pyright(ctx, args)
+@task(optional=["args"], help={"args": "black additional arguments"})
+def fix_black(ctx: Context, args: str = "."):
+ """fix black formatting"""
+ args = args or "." # needed for hatch script
+ ctx.run(f"black {args}", pty=use_pty)
+@task(optional=["args"], help={"args": "ruff additional arguments"})
+def fix_ruff(ctx: Context, args: str = "."):
+ """fix all ruff rules"""
+ args = args or "." # needed for hatch script
+ ctx.run(f"ruff --fix {args}", pty=use_pty)
+ optional=["args"],
+ help={
+ "args": "linting tools (black, ruff) additional arguments, typically a path",
+ },
+def fixall(ctx: Context, args: str = "."):
+ """Fix everything automatically"""
+ args = args or "." # needed for hatch script
+ fix_black(ctx, args)
+ fix_ruff(ctx, args)
+ lintall(ctx, args)
diff --git a/scraper/tests/test_dummy.py b/scraper/tests/test_dummy.py
new file mode 100644
index 0000000..ac33693
--- /dev/null
+++ b/scraper/tests/test_dummy.py
@@ -0,0 +1,7 @@
+from fcc2zim.constants import VERSION
+# dummy test just to check that everything is in place to add more tests / report
+# coverage
+def test_version():
+ assert VERSION and len(VERSION) > 0
diff --git a/scraper/tests/test_scraper.py b/scraper/tests/test_scraper.py
new file mode 100644
index 0000000..8800541
--- /dev/null
+++ b/scraper/tests/test_scraper.py
@@ -0,0 +1,235 @@
+import datetime
+from pathlib import Path
+from tempfile import NamedTemporaryFile, TemporaryDirectory
+import pytest
+from fcc2zim.scraper import Scraper
+DEFAULT_START_DATE = datetime.date.fromisoformat("2023-08-23")
+WORKING_DIR = TemporaryDirectory(prefix="fcc2zim_tests_")
+ZIMUI_DIST_PATH = WORKING_DIR_PATH.joinpath("zimui/dist")
+ZIMUI_DIST_PATH.mkdir(parents=True, exist_ok=True)
+BUILD_PATH = WORKING_DIR_PATH.joinpath("build")
+OUTPUT_PATH = WORKING_DIR_PATH.joinpath("output")
+ "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor "
+ "incididunt ut labore et dolore magna aliqua. At erat pellentesque adipiscing "
+ "commodo elit at imperdiet. Rutrum tellus pellentesque eu tincidunt tortor aliquam"
+ " nulla facilisi. Eget lorem dolor sed viverra ipsum nunc. Ipsum nunc aliquet "
+ "bibendum enim facilisis gravida neque convallis. Aliquam malesuada bibendum arcu "
+ "vitae elementum curabitur. Platea dictumst quisque sagittis purus sit amet "
+ "volutpat. Blandit libero volutpat sed cras ornare. In eu mi bibendum neque "
+ "egestas. Egestas dui id ornare arcu odio. Pulvinar neque laoreet suspendisse "
+ "interdum. Fames ac turpis egestas integer eget aliquet nibh praesent tristique. Et"
+ " egestas quis ipsum suspendisse ultrices gravida dictum fusce. Malesuada fames ac "
+ "turpis egestas. Tincidunt nunc pulvinar sapien et ligula ullamcorper malesuada "
+ "proin libero. In arcu cursus euismod quis viverra. Faucibus in ornare quam viverra"
+ ". Curabitur vitae nunc sed velit dignissim sodales ut eu sem. Velit scelerisque in"
+ " dictum non consectetur a erat nam. Proin fermentum leo vel orci porta non. Fames"
+ " ac turpis egestas sed tempus. Vitae justo eget magna fermentum iaculis eu non. "
+ "Imperdiet massa tincidunt nunc pulvinar sapien et ligula. Laoreet sit amet cursus "
+ "sit amet dictum sit amet. Quis hendrerit dolor magna eget. Orci ac auctor augue "
+ "mauris augue. Consequat interdum varius sit amet mattis. At ultrices mi tempus "
+ "imperdiet nulla malesuada pellentesque elit. Volutpat est velit egestas dui. "
+ "Potenti nullam ac tortor vitae. At tempor commodo ullamcorper a lacus vestibulum "
+ "sed arcu non. Duis ut diam quam nulla. Vestibulum mattis ullamcorper velit sed "
+ "ullamcorper. Sit amet commodo nulla facilisi nullam vehicula. Faucibus purus in "
+ "massa tempor nec feugiat. Sem fringilla ut morbi tincidunt augue interdum velit. "
+ "Etiam dignissim diam quis enim lobortis scelerisque fermentum dui. Nunc vel risus "
+ "commodo viverra maecenas accumsan. Aenean sed adipiscing diam donec adipiscing "
+ "tristique. Maecenas accumsan lacus vel facilisis volutpat est velit egestas. Nulla"
+ " aliquet porttitor lacus luctus accumsan tortor posuere ac. Habitant morbi "
+ "tristique senectus et netus et. Eget mi proin sed libero enim sed faucibus turpis "
+ "in. Vulputate enim nulla aliquet porttitor lacus. Dui ut ornare lectus sit amet "
+ "est. Quam lacus suspendisse faucibus interdum posuere. Sagittis orci a scelerisque"
+ " purus semper eget duis at tellus. Tellus molestie nunc non blandit massa. Feugiat"
+ " vivamus at augue eget arcu dictum varius duis at. Varius morbi enim nunc faucibus"
+ " a pellentesque sit. Id aliquet lectus proin nibh nisl condimentum id venenatis a."
+ " Tortor dignissim convallis aenean et tortor at risus viverra adipiscing. Aliquam "
+ "malesuada bibendum arcu vitae elementum curabitur vitae nunc sed. Habitasse platea"
+ " dictumst quisque sagittis purus sit amet volutpat. Vitae auctor eu augue ut "
+ "lectus. At varius vel pharetra vel turpis nunc eget. Dictum at tempor commodo "
+ "ullamcorper a lacus vestibulum sed arcu. Pellentesque massa placerat duis "
+ "ultricies. Enim nunc faucibus a pellentesque sit amet porttitor eget dolor. "
+ "Volutpat blandit aliquam etiam erat velit scelerisque in. Amet mattis vulputate "
+ "enim nulla aliquet porttitor. Egestas maecenas pharetra convallis posuere morbi "
+ "leo urna molestie. Duis ut diam quam nulla porttitor massa id. In fermentum "
+ "posuere urna nec tincidunt praesent. Turpis egestas sed tempus urna et pharetra "
+ "pharetra massa. Tellus molestie nunc non blandit massa. Diam phasellus vestibulum "
+ "lorem sed risus ultricies. Egestas erat imperdiet sed euismod nisi porta lorem. "
+ "Quam viverra orci sagittis eu volutpat odio facilisis mauris sit. Ornare aenean "
+ "euismod elementum nisi quis. Laoreet non curabitur gravida arcu ac tortor "
+ "dignissim convallis aenean. Sagittis aliquam malesuada bibendum arcu vitae "
+ "elementum. Sed blandit libero volutpat sed cras ornare. Sagittis eu volutpat odio "
+ "facilisis mauris. Facilisis volutpat est velit egestas dui id ornare arcu odio. "
+ "Eu feugiat pretium nibh."
+class TestScraper:
+ def create_scraper(
+ self,
+ *,
+ do_fetch: bool = True,
+ do_prebuild: bool = True,
+ do_build: bool = True,
+ zimui_dist_dir: str = str(ZIMUI_DIST_PATH),
+ output_dir: str = str(OUTPUT_PATH),
+ build_dir: str = str(BUILD_PATH),
+ language: str = "eng",
+ name="fcc_en_javascript",
+ title="freeCodeCamp Javascript",
+ description="FCC Javascript Courses",
+ long_description: str | None = None,
+ content_creator: str = "freeCodeCamp",
+ publisher="openZIM",
+ zim_file: str | None = None,
+ force: bool = False,
+ course_csv="regular-expressions,basic-javascript",
+ zip_path: str | None = None,
+ start_date: datetime.date = DEFAULT_START_DATE,
+ ):
+ return Scraper(
+ do_fetch=do_fetch,
+ do_prebuild=do_prebuild,
+ do_build=do_build,
+ zimui_dist_dir=zimui_dist_dir,
+ output_dir=output_dir,
+ build_dir=build_dir,
+ language=language,
+ name=name,
+ title=title,
+ description=description,
+ long_description=long_description,
+ content_creator=content_creator,
+ publisher=publisher,
+ zim_file=zim_file,
+ force=force,
+ course_csv=course_csv,
+ zip_path=zip_path,
+ start_date=start_date,
+ )
+ def test_init_ok(self):
+ assert not OUTPUT_PATH.exists()
+ assert not BUILD_PATH.exists()
+ self.create_scraper()
+ assert OUTPUT_PATH.exists()
+ assert BUILD_PATH.exists()
+ @pytest.mark.parametrize(
+ "do_fetch, do_prebuild, do_build, expected_do_fetch, expected_do_prebuild,"
+ "expected_do_build",
+ [
+ pytest.param(False, False, False, True, True, True, id="FFF"),
+ pytest.param(True, False, False, True, False, False, id="TFF"),
+ pytest.param(False, True, False, False, True, False, id="FTF"),
+ pytest.param(True, True, False, True, True, False, id="TTF"),
+ pytest.param(False, False, True, False, False, True, id="FFT"),
+ pytest.param(True, False, True, True, False, True, id="TFT"),
+ pytest.param(False, True, True, False, True, True, id="FTT"),
+ pytest.param(True, True, True, True, True, True, id="TTT"),
+ ],
+ )
+ def test_do_phases_ok(
+ self,
+ *,
+ do_fetch: bool,
+ do_prebuild: bool,
+ do_build: bool,
+ expected_do_fetch: bool,
+ expected_do_prebuild: bool,
+ expected_do_build: bool,
+ ):
+ scraper = self.create_scraper(
+ do_fetch=do_fetch, do_prebuild=do_prebuild, do_build=do_build
+ )
+ assert scraper.do_fetch == expected_do_fetch
+ assert scraper.do_prebuild == expected_do_prebuild
+ assert scraper.do_build == expected_do_build
+ def test_zimui_dist_dir_ko(self):
+ with pytest.raises(ValueError):
+ self.create_scraper(zimui_dist_dir="whatever")
+ @pytest.mark.parametrize(
+ "language, expected_fcc_lang",
+ [
+ pytest.param("eng", "english", id="english"),
+ pytest.param("eng", "english", id="english"),
+ pytest.param("ara", "arabic", id="arabic"),
+ pytest.param("cmn", "chinese", id="chinese"),
+ pytest.param("lzh", "chinese-traditional", id="chinese-traditional"),
+ pytest.param("eng", "english", id="english"),
+ pytest.param("spa", "espanol", id="espanol"),
+ pytest.param("deu", "german", id="german"),
+ pytest.param("ita", "italian", id="italian"),
+ pytest.param("jpn", "japanese", id="japanese"),
+ pytest.param("por", "portuguese", id="portuguese"),
+ pytest.param("ukr", "ukranian", id="ukranian"),
+ ],
+ )
+ def test_fcc_lang_ok(self, language: str, expected_fcc_lang: str):
+ scraper = self.create_scraper(language=language)
+ assert scraper.language == language
+ assert scraper.fcc_lang == expected_fcc_lang
+ def test_language_ko(self):
+ with pytest.raises(ValueError):
+ self.create_scraper(language="whatever")
+ def test_description_ko(self):
+ with pytest.raises(ValueError):
+ self.create_scraper(description=LONG_TEXT[:81])
+ def test_long_description_ko(self):
+ with pytest.raises(ValueError):
+ self.create_scraper(long_description=LONG_TEXT[:4001])
+ def test_title_ko(self):
+ with pytest.raises(ValueError):
+ self.create_scraper(title=LONG_TEXT[:31])
+ def test_zip_path_ok(self):
+ with NamedTemporaryFile(dir=WORKING_DIR_PATH) as tmp:
+ zip_path = tmp.name
+ self.create_scraper(zip_path=zip_path)
+ def test_zip_path_ko(self):
+ with pytest.raises(ValueError):
+ self.create_scraper(zip_path="whatever")
+ @pytest.mark.parametrize(
+ "name, start_date",
+ [
+ pytest.param("something", "2023-08-23", id="case1"),
+ pytest.param("name2", "2023-08-24", id="case2"),
+ ],
+ )
+ def test_zim_file_default(self, name, start_date):
+ scraper = self.create_scraper(
+ name=name, start_date=datetime.date.fromisoformat(start_date)
+ )
+ assert scraper.zim_path == OUTPUT_PATH.joinpath(f"{name}_{start_date[:7]}.zim")
+ def test_zim_file_is_path_ko(self):
+ with pytest.raises(ValueError):
+ self.create_scraper(zim_file=str(OUTPUT_PATH.joinpath("whatever.zim")))
+ def test_zim_file_ok(self):
+ self.create_scraper(zim_file="whatever.zim")
+ def test_zim_file_exists_ko(self):
+ with NamedTemporaryFile(dir=OUTPUT_PATH, suffix=".zim") as tmp:
+ zim_file = Path(tmp.name).name
+ with pytest.raises(ValueError):
+ self.create_scraper(zim_file=zim_file)
+ def test_zim_file_exists_force(self):
+ with NamedTemporaryFile(dir=OUTPUT_PATH, suffix=".zim", delete=False) as tmp:
+ zim_file = Path(tmp.name).name
+ self.create_scraper(zim_file=zim_file, force=True)
+ assert not Path(tmp.name).exists()
