diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
deleted file mode 100644
index bbc8644..0000000
--- a/.devcontainer/devcontainer.json
+++ /dev/null
@@ -1,25 +0,0 @@
-// For format details, see https://aka.ms/devcontainer.json. For config options, see the
-// README at: https://github.com/devcontainers/templates/tree/main/src/typescript-node
-{
- "name": "Node.js & TypeScript",
- // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
- "image": "mcr.microsoft.com/devcontainers/typescript-node:0-20",
- "features": {
- "ghcr.io/devcontainers/features/python:1": {}
- }
-
- // Features to add to the dev container. More info: https://containers.dev/features.
- // "features": {},
-
- // Use 'forwardPorts' to make a list of ports inside the container available locally.
- // "forwardPorts": [],
-
- // Use 'postCreateCommand' to run commands after the container is created.
- // "postCreateCommand": "yarn install",
-
- // Configure tool-specific properties.
- // "customizations": {},
-
- // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
- // "remoteUser": "root"
-}
diff --git a/.github/workflows/Publish.yaml b/.github/workflows/Publish.yaml
new file mode 100644
index 0000000..e128308
--- /dev/null
+++ b/.github/workflows/Publish.yaml
@@ -0,0 +1,45 @@
+name: Build and upload to PyPI
+
+on:
+ release:
+ types: [published]
+
+jobs:
+ publish:
+ runs-on: ubuntu-22.04
+ permissions:
+ id-token: write # mandatory for PyPI trusted publishing
+
+ steps:
+ - uses: actions/checkout@v3
+
+ - name: Set up Python
+ uses: actions/setup-python@v4
+ with:
+ python-version-file: scraper/pyproject.toml
+ architecture: x64
+
+ - name: Build packages
+ working-directory: scraper
+ run: |
+ pip install -U pip build
+ python -m build --sdist --wheel
+
+ - name: Upload to PyPI
+ uses: pypa/gh-action-pypi-publish@release/v1.8
+ with:
+ packages-dir: scraper/dist/
+
+ - name: Build and push Docker image
+ uses: openzim/docker-publish-action@v10
+ with:
+ image-name: openzim/freecodecamp
+ tag-pattern: /^v([0-9.]+)$/
+ latest-on-tag: true
+ restrict-to: openzim/freecodecamp
+ registries: ghcr.io
+ credentials:
+ GHCRIO_USERNAME=${{ secrets.GHCR_USERNAME }}
+ GHCRIO_TOKEN=${{ secrets.GHCR_TOKEN }}
+ repo_description: auto
+ repo_overview: auto
diff --git a/.github/workflows/docker.yaml b/.github/workflows/PublishDockerDevImage.yaml
similarity index 61%
rename from .github/workflows/docker.yaml
rename to .github/workflows/PublishDockerDevImage.yaml
index 9d086e9..0026a6f 100644
--- a/.github/workflows/docker.yaml
+++ b/.github/workflows/PublishDockerDevImage.yaml
@@ -1,29 +1,27 @@
-name: Docker
+name: Publish Docker dev image
on:
push:
branches:
- main
- tags:
- - v*
jobs:
- build-and-push:
- name: Deploy Docker Image
+ publish:
runs-on: ubuntu-22.04
+
steps:
- - uses: actions/checkout@v3.4.0
- - name: Build and push
+ - uses: actions/checkout@v3
+
+ - name: Build and push Docker image
uses: openzim/docker-publish-action@v10
with:
image-name: openzim/freecodecamp
- on-master: dev
- tag-pattern: /^v([0-9.]+)$/
- latest-on-tag: true
+ manual-tag: dev
+ latest-on-tag: false
restrict-to: openzim/freecodecamp
registries: ghcr.io
credentials:
GHCRIO_USERNAME=${{ secrets.GHCR_USERNAME }}
GHCRIO_TOKEN=${{ secrets.GHCR_TOKEN }}
repo_description: auto
- repo_overview: auto
\ No newline at end of file
+ repo_overview: auto
diff --git a/.github/workflows/Tests.yaml b/.github/workflows/Tests.yaml
new file mode 100644
index 0000000..e6d5e3b
--- /dev/null
+++ b/.github/workflows/Tests.yaml
@@ -0,0 +1,66 @@
+name: Tests
+
+on:
+ pull_request:
+ push:
+ branches:
+ - main
+
+jobs:
+ run-tests:
+ runs-on: ubuntu-22.04
+
+ steps:
+ - uses: actions/checkout@v3
+
+ - name: Set up Python
+ uses: actions/setup-python@v4
+ with:
+ python-version-file: scraper/pyproject.toml
+ architecture: x64
+
+ - name: Install dependencies (and project)
+ working-directory: scraper
+ run: |
+ pip install -U pip
+ pip install -e .[test,scripts]
+
+ - name: Run the tests
+ working-directory: scraper
+ run: inv coverage --args "-vvv"
+
+ - name: Upload coverage report to codecov
+ uses: codecov/codecov-action@v3
+ with:
+ directory: backend
+ token: ${{ secrets.CODECOV_TOKEN }}
+
+ build_python:
+ runs-on: ubuntu-22.04
+ steps:
+ - uses: actions/checkout@v3
+
+ - name: Set up Python
+ uses: actions/setup-python@v4
+ with:
+ python-version-file: scraper/pyproject.toml
+ architecture: x64
+
+ - name: Ensure we can build Python targets
+ working-directory: scraper
+ run: |
+ pip install -U pip build
+ python3 -m build --sdist --wheel
+
+ build_docker:
+ runs-on: ubuntu-22.04
+ steps:
+ - uses: actions/checkout@v3
+
+ - name: Ensure we can build the Docker image
+ run: |
+ docker build -t testimage .
+
+ - name: Ensure we can start the Docker image
+ run: |
+ docker run --rm testimage --version
diff --git a/.github/workflows/integration.yaml b/.github/workflows/integration.yaml
deleted file mode 100644
index ddfcfe0..0000000
--- a/.github/workflows/integration.yaml
+++ /dev/null
@@ -1,39 +0,0 @@
-name: Build and test the freeCodeCamp zim file
-
-on:
- pull_request:
- push:
- branches:
- - 'main'
-jobs:
- integration-test:
- runs-on: ubuntu-latest
- name: Build latest zim file
- steps:
- - uses: actions/checkout@v3
- - uses: actions/setup-node@v3
- with:
- node-version: 20
- cache: 'yarn'
- cache-dependency-path: '**/yarn.lock'
- - run: yarn install --frozen-lockfile
- working-directory: client
- - run: yarn lint
- working-directory: client
- - run: yarn build
- working-directory: client
-
- - uses: actions/setup-python@v4
- with:
- python-version: '3.11'
- cache: 'pip' # caching pip dependencies
-
- - run: make setup
- - run: make fetch
- - run: make prebuild
-
- # Full integration test using the fetch/prebuild markdown as a fixture
- - run: yarn test --run
- working-directory: client
-
- - run: make zim
\ No newline at end of file
diff --git a/.github/workflows/qa.yaml b/.github/workflows/qa.yaml
index 0c54d7d..75b0e7a 100644
--- a/.github/workflows/qa.yaml
+++ b/.github/workflows/qa.yaml
@@ -1,39 +1,38 @@
name: QA
on:
- push:
- branches: main
pull_request:
-
-env:
- MAX_LINE_LENGTH: 88
+ push:
+ branches:
+ - main
jobs:
check-qa:
- runs-on: ubuntu-20.04
+ runs-on: ubuntu-22.04
+
steps:
- - uses: actions/checkout@v2
- - uses: actions/setup-python@v4
- with:
- python-version: '3.11'
- cache: 'pip' # caching pip dependencies
+ - uses: actions/checkout@v3
+ - name: Set up Python
+ uses: actions/setup-python@v4
+ with:
+ python-version-file: scraper/pyproject.toml
+ architecture: x64
- - name: Install lint requirements
+ - name: Install dependencies (and project)
+ working-directory: scraper
run: |
- pip install -r openzim/lint_requirements.txt
+ pip install -U pip
+ pip install -e .[lint,scripts,test,check]
- name: Check black formatting
- run: |
- black --version
- black --check .
+ working-directory: scraper
+ run: inv lint-black
- - name: Check flake8 linting
- run: |
- flake8 --version
- flake8 . --count --max-line-length=$MAX_LINE_LENGTH --statistics
+ - name: Check ruff
+ working-directory: scraper
+ run: inv lint-ruff
- - name: Check import order with isort
- run: |
- isort --version
- isort --profile black --check .
\ No newline at end of file
+ - name: Check pyright
+ working-directory: scraper
+ run: inv check-pyright
diff --git a/.gitignore b/.gitignore
index 52bb2fe..586dd9f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,18 +1,374 @@
-# Logs
-logs
+# Created by https://www.toptal.com/developers/gitignore/api/linux,macos,python,database,visualstudiocode,intellij
+# Edit at https://www.toptal.com/developers/gitignore?templates=linux,macos,python,database,visualstudiocode,intellij
+
+### Database ###
+*.accdb
+*.db
+*.dbf
+*.mdb
+*.pdb
+*.sqlite3
+*.db-shm
+*.db-wal
+
+### Intellij ###
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+
+# User-specific stuff
+.idea/**/workspace.xml
+.idea/**/tasks.xml
+.idea/**/usage.statistics.xml
+.idea/**/dictionaries
+.idea/**/shelf
+
+# AWS User-specific
+.idea/**/aws.xml
+
+# Generated files
+.idea/**/contentModel.xml
+
+# Sensitive or high-churn files
+.idea/**/dataSources/
+.idea/**/dataSources.ids
+.idea/**/dataSources.local.xml
+.idea/**/sqlDataSources.xml
+.idea/**/dynamic.xml
+.idea/**/uiDesigner.xml
+.idea/**/dbnavigator.xml
+
+# Gradle
+.idea/**/gradle.xml
+.idea/**/libraries
+
+# Gradle and Maven with auto-import
+# When using Gradle or Maven with auto-import, you should exclude module files,
+# since they will be recreated, and may cause churn. Uncomment if using
+# auto-import.
+# .idea/artifacts
+# .idea/compiler.xml
+# .idea/jarRepositories.xml
+# .idea/modules.xml
+# .idea/*.iml
+# .idea/modules
+# *.iml
+# *.ipr
+
+# CMake
+cmake-build-*/
+
+# Mongo Explorer plugin
+.idea/**/mongoSettings.xml
+
+# File-based project format
+*.iws
+
+# IntelliJ
+out/
+
+# mpeltonen/sbt-idea plugin
+.idea_modules/
+
+# JIRA plugin
+atlassian-ide-plugin.xml
+
+# Cursive Clojure plugin
+.idea/replstate.xml
+
+# SonarLint plugin
+.idea/sonarlint/
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+com_crashlytics_export_strings.xml
+crashlytics.properties
+crashlytics-build.properties
+fabric.properties
+
+# Editor-based Rest Client
+.idea/httpRequests
+
+# Android studio 3.1+ serialized cache file
+.idea/caches/build_file_checksums.ser
+
+### Intellij Patch ###
+# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
+
+# *.iml
+# modules.xml
+# .idea/misc.xml
+# *.ipr
+
+# Sonarlint plugin
+# https://plugins.jetbrains.com/plugin/7973-sonarlint
+.idea/**/sonarlint/
+
+# SonarQube Plugin
+# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin
+.idea/**/sonarIssues.xml
+
+# Markdown Navigator plugin
+# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced
+.idea/**/markdown-navigator.xml
+.idea/**/markdown-navigator-enh.xml
+.idea/**/markdown-navigator/
+
+# Cache file creation bug
+# See https://youtrack.jetbrains.com/issue/JBR-2257
+.idea/$CACHE_FILE$
+
+# CodeStream plugin
+# https://plugins.jetbrains.com/plugin/12206-codestream
+.idea/codestream.xml
+
+# Azure Toolkit for IntelliJ plugin
+# https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij
+.idea/**/azureSettings.xml
+
+### Linux ###
+*~
+
+# temporary files which can be created if a process still has a handle open of a deleted file
+.fuse_hidden*
+
+# KDE directory preferences
+.directory
+
+# Linux trash folder which might appear on any partition or disk
+.Trash-*
+
+# .nfs files are created when an open file is removed but is still being accessed
+.nfs*
+
+### macOS ###
+# General
+.DS_Store
+.AppleDouble
+.LSOverride
+
+# Icon must end with two \r
+Icon
+
+
+# Thumbnails
+._*
+
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+
+### macOS Patch ###
+# iCloud generated files
+*.icloud
+
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+# For a library or package, you might want to ignore these files since the code is
+# intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+# This is especially recommended for binary packages to ensure reproducibility, and is more
+# commonly ignored for libraries.
+# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+# in version control.
+# https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
-# Editor directories and files
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+# and can be added to the global gitignore or merged into this file. For a more nuclear
+# option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+### Python Patch ###
+# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
+poetry.toml
+
+# ruff
+.ruff_cache/
+
+# LSP config files
+pyrightconfig.json
+
+### VisualStudioCode ###
.vscode/*
+!.vscode/settings.json
+!.vscode/tasks.json
+!.vscode/launch.json
!.vscode/extensions.json
-.idea
-.DS_Store
-*.suo
-*.ntvs*
-*.njsproj
-*.sln
-*.sw?
+!.vscode/*.code-snippets
+
+# Local History for Visual Studio Code
+.history/
+# Built Visual Studio Code Extensions
+*.vsix
+
+### VisualStudioCode Patch ###
+# Ignore all local history of files
+.history
+.ionide
+
+# End of https://www.toptal.com/developers/gitignore/api/linux,macos,python,database,visualstudiocode,intellij
+
+# local dirs used for working files
tmp
build
-*.zim
\ No newline at end of file
+output
+
+# ignore all vscode, this configuration is not maintained
+.vscode
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..24463f5
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,27 @@
+# See https://pre-commit.com for more information
+# See https://pre-commit.com/hooks.html for more hooks
+repos:
+- repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v4.4.0
+ hooks:
+ - id: trailing-whitespace
+ - id: end-of-file-fixer
+- repo: https://github.com/psf/black
+ rev: "23.7.0"
+ hooks:
+ - id: black
+- repo: https://github.com/astral-sh/ruff-pre-commit
+ rev: v0.0.285
+ hooks:
+ - id: ruff
+- repo: https://github.com/RobertCraigie/pyright-python
+ rev: v1.1.323
+ hooks:
+ - id: pyright
+ name: pyright (system)
+ description: 'pyright static type checker'
+ entry: pyright
+ language: system
+ 'types_or': [python, pyi]
+ require_serial: true
+ minimum_pre_commit_version: '2.9.2'
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..8bc3161
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,11 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [Unreleased]
+
+### Added
+- Initial version, supporting only Javascript challenges
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..5cfc18d
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,45 @@
+# Contributing
+
+This project adheres to openZIM's [Contribution Guidelines](https://github.com/openzim/overview/wiki/Contributing)
+and openZIM's [Bootstrap conventions](https://github.com/openzim/_python-bootstrap/wiki/) especially its
+[Policy](https://github.com/openzim/_python-bootstrap/wiki/Policy).
+
+## Guidelines
+
+- Don't take assigned issues. Comment if those get staled.
+- If your contribution is far from trivial, open an issue to discuss it first.
+- Ensure your code passes `inv lintall` and `inv checkall`
+
+## Configure your environment
+
+Development environment is meant to be managed by `hatch` and commits can be checked with `pre-commit`.
+
+If not already installed on your machine, install it in your global environment:
+
+```
+pip install -U hatch pre-commit
+```
+
+Install precommit
+
+```
+pre-commit install
+```
+
+Go to scraper directory:
+
+```
+cd scraper
+```
+
+Start a hatch shell to run further commands:
+
+```
+hatch shell
+```
+
+Install/Update dependencies:
+
+```sh
+pip install -U ".[dev]"
+```
diff --git a/Dockerfile b/Dockerfile
index dfb1704..fae6752 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,19 +1,33 @@
-FROM mcr.microsoft.com/devcontainers/typescript-node:20 as client
+FROM node:20-alpine as zimui
WORKDIR /src
-COPY client /src
+COPY zimui /src
RUN yarn install --frozen-lockfile
RUN yarn build
-FROM python:3.11-buster
+FROM python:3.11.4-bookworm
+LABEL org.opencontainers.image.source https://github.com/openzim/freecodecamp
-WORKDIR /src
-COPY openzim/requirements.txt /src
-RUN pip install -r requirements.txt --no-cache-dir
+RUN python -m pip install --no-cache-dir -U \
+ pip
+
+# Copy code + associated artifacts + zimui build output
+COPY LICENSE LICENSE.fcc.md README.md /src/
+COPY scraper/pyproject.toml scraper/tasks.py /src/scraper/
+COPY scraper/src /src/scraper/src
+COPY --from=zimui /src/dist /src/zimui
+
+# Install + cleanup
+RUN pip install --no-cache-dir /src/scraper \
+ && rm -rf /src/scraper
-COPY openzim /src
-COPY --from=client /src /src/client
+# default output directory
+RUN mkdir -p /output
+WORKDIR /output
+ENV BUILD_DIR=/tmp
+ENV OUTPUT_DIR=/output
+ENV ZIMUI_DIST_DIR=/src/zimui
-ENTRYPOINT ["python3", "fcc2zim"]
\ No newline at end of file
+ENTRYPOINT ["fcc2zim"]
diff --git a/Makefile b/Makefile
deleted file mode 100644
index 873b19d..0000000
--- a/Makefile
+++ /dev/null
@@ -1,70 +0,0 @@
-SHELL=/bin/bash
-
-
-COURSES = regular-expressions \
- basic-javascript \
- basic-data-structures \
- debugging \
- functional-programming \
- object-oriented-programming \
- basic-algorithm-scripting \
- intermediate-algorithm-scripting \
- javascript-algorithms-and-data-structures-projects
-
-COURSE_CSV=$(shell sed -r 's/[[:space:]]/,/g' <<< "${COURSES}")
-
-TITLE="freeCodeCamp Javascript"
-NAME="fcc_en_javascript"
-DESCRIPTION="FCC Javascript Courses"
-LANG=eng
-CLIENTDIR=./client/dist
-TMPDIR=./tmp
-OUTPATH=./build/${LANG}.zim
-MAX_LINE_LENGTH = 88
-
-.PHONY: all setup clean client build
-
-clean:
- rm -rf client/dist/fcc
- rm -rf client/public/fcc
- rm -rf ${TMPDIR}/curriculum
- rm -rf build
-
-setup:
- cd openzim && \
- pip install -r requirements.txt \
- pip install -r lint_requirements.txt
-
-lint:
- cd openzim
- black .
- flake8 . --count --max-line-length=${MAX_LINE_LENGTH} --statistics
- isort --profile black .
-
-fetch:
- python3 openzim/fcc2zim fetch --tmpdir=${TMPDIR}
-
-prebuild:
- python3 openzim/fcc2zim prebuild --course=${COURSE_CSV} --outdir=./client/dist/fcc --language ${LANG} --tmpdir=${TMPDIR}
-
-zim:
- python3 openzim/fcc2zim zim --clientdir ${CLIENTDIR} --outzim ${OUTPATH} \
- --language ${LANG} --name ${NAME} --title ${TITLE} --description ${DESCRIPTION}
-
-all: clean fetch prebuild zim
-
-build:
- python3 openzim/fcc2zim all --clientdir ${CLIENTDIR} --outdir=./client/dist/fcc --outzim ${OUTPATH} \
- --language ${LANG} --tmpdir=${TMPDIR} --course=${COURSE_CSV} \
- --name ${NAME} --title ${TITLE} --description ${DESCRIPTION}
-
-docker_build:
- docker build . -t openzim/fcc2zim
-
-docker_run:
- docker run --rm -it -v $(PWD)/tmp:/tmp/fcc2zim openzim/fcc2zim all --clientdir ${CLIENTDIR} --outdir=./client/dist/fcc --outzim ${OUTPATH} \
- --language ${LANG} --tmpdir=/tmp/fcc2zim --course=${COURSE_CSV} \
- --name ${NAME} --title ${TITLE} --description ${DESCRIPTION}
-
-docker_debug:
- docker run --rm -it --entrypoint=/bin/bash openzim/fcc2zim
\ No newline at end of file
diff --git a/README.md b/README.md
index 79fe5b9..7f5a919 100644
--- a/README.md
+++ b/README.md
@@ -1,52 +1,66 @@
-# FCC on Zim
-
+# freeCodeCamp scraper
+
+This scraper downloads selected [freeCodeCamp](https://www.freecodecamp.org/) courses and puts it in a
+[ZIM](https://openzim.org) file, a clean and user friendly format for storing content for offline usage.
+
+[![CodeFactor](https://www.codefactor.io/repository/github/openzim/freecodecamp/badge)](https://www.codefactor.io/repository/github/openzim/freecodecamp)
+[![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0)
+[![codecov](https://codecov.io/gh/openzim/freecodecamp/branch/main/graph/badge.svg)](https://codecov.io/gh/openzim/freecodecamp)
+[![PyPI version shields.io](https://img.shields.io/pypi/v/fcc2zim.svg)](https://pypi.org/project/fcc2zim/)
+[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/fcc2zim.svg)](https://pypi.org/project/fcc2zim/)
+[![Docker](https://ghcr-badge.deta.dev/openzim/freecodecamp/latest_tag?label=docker)](https://ghcr.io/openzim/freecodecamp)
+## Architecture
This project consists of two major components:
-- Openzim - The scripts (python) that fetch the latest FCC curriculum and package it into a format client can read, as well as our zim builder
-- Client - A vite app configured to be consumed by a Zim reader.
+- `zimui` - A Vue.JS application specially crafted to:
+ - be embeded inside the ZIM and serve as main entry point (through compilation for offline usage with Vite)
+ - present FCC curriculum, including solving exercices
+ - be compatible with most ZIM readers
+- `scraper` - The Python tool that build FCC ZIM. It is responsible to:
+ - fetch FCC curriculum and package it into a proper format
+ - embed client can read, as well as our zim builder
-## freeCodeCamp Zim build process
+## Dependencies
-This process can be broken down into 5 parts
+Aside Node.JS and Python dependencies which are managed, other binary dependencies comes from Python [zimscraperlib](https://github.com/openzim/python-scraperlib/)
-1. Build the Vite client
-1. Fetch the latest curriculum from freeCodeCamp by downloading the latest release source archive
-1. "Prebuild" the curriculum for a selected langauge and set of courses. Copy to the client directory
-1. Build a Zim file of the resulting Vite application.
## Development
-#### Prerequsites
-
-- Node 20.x
-- Python 3
-
-This project comes with .devcontainer to help onboard new developers, with Node 20 and Python3 installed
+See [CONTRIBUTING.md](CONTRIBUTING.md).
-See: [`Makefile`](Makefile) for a full build process
+### Prerequisites
-## Building with Docker
+- Node 20.x
+- Python 3.11
-- `docker build -t openzim/fcc2zim .`
-- `docker run --rm -it -v /workspaces/openzim-freecodecamp/tmp:/tmp/fcc2zim openzim/fcc2zim all \
- --clientdir ./client/dist --outdir=./client/dist/fcc --outzim ./build/eng.zim \
- --language eng --tmpdir=/tmp/fcc2zim \
- --course=regular-expressions,basic-javascript,basic-data-structures,debugging,functional-programming,object-oriented-programming,basic-algorithm-scripting,intermediate-algorithm-scripting,javascript-algorithms-and-data-structures-projects \
- --name "fcc_en_javascript" --title "freeCodeCamp Javascript" --description "FCC Javascript Courses"
-`
+### Running scraper locally
-## Course Options and Limitations
+You have to:
+- build the `zimui` frontend which will be embededed inside the ZIM (and redo it every time you make modifications to the `zimui`)
+- run the `scraper` to retrieve FCC curriculum and build the ZIM
-Currently this scraper only supports Javascript challenges. A list of courses is passed to the `prebuild` step as a comma seperated list of 'course slugs'.
+Sample commands:
+```
+cd zimui
+yarn install
+yarn build
+cd ../scraper
+hatch run fcc2zim --language eng --course "regular-expressions,basic-javascript,basic-data-structures,debugging,functional-programming,object-oriented-programming,basic-algorithm-scripting,intermediate-algorithm-scripting,javascript-algorithms-and-data-structures-projects" --name "fcc_en_javascript" --title "freeCodeCamp Javascript" --description "FCC Javascript Courses"
+```
-You can find a list of course slugs in the [freeCodeCamp curriculum folder](https://github.com/freeCodeCamp/freeCodeCamp/tree/main/curriculum/challenges/english/02-javascript-algorithms-and-data-structures)
+### Running scraper with Docker
-Example:
+Run from official version (published on GHCR.io) ; ZIM will be available in the `output` sub-folder of current working directory.
```
-python3 openzim/fcc2zim prebuild --course=regular-expressions,basic-javascript,basic-data-structures,debugging,functional-programming,object-oriented-programming,basic-algorithm-scripting,intermediate-algorithm-scripting,javascript-algorithms-and-data-structures-projects --outdir=./client/dist/fcc --language eng --tmpdir=./tmp
+docker run --rm -it -v $(pwd)/output:/output ghcr.io/openzim/freecodecamp:latest --language eng --course "regular-expressions,basic-javascript,basic-data-structures,debugging,functional-programming,object-oriented-programming,basic-algorithm-scripting,intermediate-algorithm-scripting,javascript-algorithms-and-data-structures-projects" --name "fcc_en_javascript" --title "freeCodeCamp Javascript" --description "FCC Javascript Courses"
```
-# License
+## Course Options and Limitations
+
+Currently this scraper only supports Javascript challenges. A list of courses is passed to the scraper as a comma seperated list of 'course slugs'.
+
+You can find a list of course slugs in the [freeCodeCamp curriculum folder](https://github.com/freeCodeCamp/freeCodeCamp/tree/main/curriculum/challenges/english/02-javascript-algorithms-and-data-structures)
-This repository is licensed under GPLv3, with the exception of the freeCodeCamp curriculum which is licensed under BSD 3 Clause (see LICENSE.fcc.md).
+In docker example above, see the `--course` argument : `regular-expressions,basic-javascript,basic-data-structures,debugging,functional-programming,object-oriented-programming,basic-algorithm-scripting,intermediate-algorithm-scripting,javascript-algorithms-and-data-structures-projects`
diff --git a/openzim/.gitignore b/openzim/.gitignore
deleted file mode 100644
index 1929d0f..0000000
--- a/openzim/.gitignore
+++ /dev/null
@@ -1,131 +0,0 @@
-# Byte-compiled / optimized / DLL files
-__pycache__/
-*.py[cod]
-*$py.class
-
-# C extensions
-*.so
-
-# Distribution / packaging
-.Python
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-pip-wheel-metadata/
-share/python-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-MANIFEST
-
-# PyInstaller
-# Usually these files are written by a python script from a template
-# before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.nox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*.cover
-*.py,cover
-.hypothesis/
-.pytest_cache/
-
-# Translations
-*.mo
-*.pot
-
-# Django stuff:
-*.log
-local_settings.py
-db.sqlite3
-db.sqlite3-journal
-
-# Flask stuff:
-instance/
-.webassets-cache
-
-# Scrapy stuff:
-.scrapy
-
-# Sphinx documentation
-docs/_build/
-
-# PyBuilder
-target/
-
-# Jupyter Notebook
-.ipynb_checkpoints
-
-# IPython
-profile_default/
-ipython_config.py
-
-# pyenv
-.python-version
-
-# pipenv
-# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
-# However, in case of collaboration, if having platform-specific dependencies or dependencies
-# having no cross-platform support, pipenv may install dependencies that don't work, or not
-# install all needed dependencies.
-#Pipfile.lock
-
-# PEP 582; used by e.g. github.com/David-OConnor/pyflow
-__pypackages__/
-
-# Celery stuff
-celerybeat-schedule
-celerybeat.pid
-
-# SageMath parsed files
-*.sage.py
-
-# Environments
-.env
-.venv
-env/
-venv/
-ENV/
-env.bak/
-venv.bak/
-
-# Spyder project settings
-.spyderproject
-.spyproject
-
-# Rope project settings
-.ropeproject
-
-# mkdocs documentation
-/site
-
-# mypy
-.mypy_cache/
-.dmypy.json
-dmypy.json
-
-# Pyre type checker
-.pyre/
-
-/tmp
\ No newline at end of file
diff --git a/openzim/README.md b/openzim/README.md
deleted file mode 100644
index 90d294b..0000000
--- a/openzim/README.md
+++ /dev/null
@@ -1,2 +0,0 @@
-# freecodecamp
-FreeCodeCamp.org scraper (to ZIM)
diff --git a/openzim/fcc2zim b/openzim/fcc2zim
deleted file mode 100755
index 3cc3068..0000000
--- a/openzim/fcc2zim
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# vim: ai ts=4 sts=4 et sw=4 nu
-import argparse
-
-from fcctozim import VERSION, logger
-from fcctozim.build import build
-from fcctozim.fetch import fetch_command
-from fcctozim.prebuild import prebuild_command
-
-
-def main(args):
- command = args.command
-
- if command == 'fetch':
- fetch_command(args)
- elif command == 'prebuild':
- prebuild_command(args)
- elif command == 'zim':
- build(args)
- elif command == 'all':
- fetch_command(args)
- prebuild_command(args)
- build(args)
-
-
-if __name__ == "__main__":
- parser = argparse.ArgumentParser(prog='fcc2zim')
-
-
- # create sub-parser
- sub_parsers = parser.add_subparsers(help='sub-command help', dest='command')
-
- fetch_cmd = sub_parsers.add_parser('fetch', help='fetch the latest curriculum')
- fetch_cmd.add_argument('--tmpdir', type=str, help='the temporary directory to hold the curriculum')
- fetch_cmd.add_argument('--force', type=bool, help='force a re-download of the curriculum zip')
-
- prebuild_cmd = sub_parsers.add_parser('prebuild', help='prebuild curriculum for Vite frontend')
- prebuild_cmd.add_argument('--language', type=str, help='Curriculum language')
- prebuild_cmd.add_argument('--course', type=str, help='Course or course list (seperated by commas)', required=True)
- prebuild_cmd.add_argument('--outdir', type=str, help='output directory (typically vite\'s dist folder)', required=True)
- prebuild_cmd.add_argument('--tmpdir', type=str, help='the temporary directory to hold the curriculum')
-
- zim_cmd = sub_parsers.add_parser('zim', help='package up the zim file')
- zim_cmd.add_argument('--clientdir', type=str, help='the directory containing our Vite application', required=True)
- zim_cmd.add_argument('--outzim', type=str, help='the path of our output file')
- zim_cmd.add_argument('--language', type=str, help='Language of zim file', required=True)
- zim_cmd.add_argument('--title', type=str, help='Title of zim file', required=True)
- zim_cmd.add_argument('--name', type=str, help='Name of zim file', required=True)
- zim_cmd.add_argument('--description', type=str, help='Description of zim file', required=True)
- zim_cmd.add_argument('--creator', type=str, help='Creator of the zim files content', required=False)
- zim_cmd.add_argument('--publisher', type=str, help='Publisher of the zim file', required=False)
-
- all_cmd = sub_parsers.add_parser('all', help='fetch, build and package up a zim file')
- all_cmd.add_argument('--force', type=bool, help='force a re-download of the curriculum zip')
- all_cmd.add_argument('--course', type=str, help='Course or course list (seperated by commas)', required=True)
- all_cmd.add_argument('--outdir', type=str, help='output directory (typically vite\'s dist folder)', required=True)
- all_cmd.add_argument('--outzim', type=str, help='path of the zim file to output', required=True)
- all_cmd.add_argument('--tmpdir', type=str, help='the temporary directory to hold the curriculum')
- all_cmd.add_argument('--clientdir', type=str, help='the directory containing our Vite application', required=True)
- all_cmd.add_argument('--outpath', type=str, help='the path of our output file')
- all_cmd.add_argument('--language', type=str, help='Language of zim file and curriculum', required=True)
- all_cmd.add_argument('--title', type=str, help='Title of zim file', required=True)
- all_cmd.add_argument('--name', type=str, help='Name of zim file', required=True)
- all_cmd.add_argument('--description', type=str, help='Description of zim file', required=True)
- all_cmd.add_argument('--creator', type=str, help='Creator of the zim files content', required=False)
- all_cmd.add_argument('--publisher', type=str, help='Publisher of the zim file', required=False)
-
- args = parser.parse_args()
- if args.command:
- main(args)
- else:
- parser.print_usage()
diff --git a/openzim/fcctozim/__init__.py b/openzim/fcctozim/__init__.py
deleted file mode 100644
index b94ae4e..0000000
--- a/openzim/fcctozim/__init__.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# vim: ai ts=4 sts=4 et sw=4 nu
-
-import logging
-import threading
-
-logger = logging.getLogger(__name__)
-
-VERSION = "1.0.0"
-
-FCC_LANG_MAP = {
- "ara": "arabic",
- "cmn": "chinese",
- "lzh": "chinese-traditional",
- "eng": "english",
- "spa": "espanol",
- "deu": "german",
- "ita": "italian",
- "jpn": "japanese",
- "por": "portuguese",
- "ukr": "ukranian",
-}
-
-lock = threading.Lock()
-
-creator = None
diff --git a/openzim/fcctozim/build.py b/openzim/fcctozim/build.py
deleted file mode 100644
index 77beeb8..0000000
--- a/openzim/fcctozim/build.py
+++ /dev/null
@@ -1,100 +0,0 @@
-import json
-import pathlib
-from collections import OrderedDict
-from datetime import datetime
-
-from fcctozim import FCC_LANG_MAP, VERSION, logger
-from zimscraperlib.zim import Creator
-
-logo_path = pathlib.Path(__file__).parent.parent.joinpath("fcc_48.png")
-
-
-def build_curriculum_redirects(clientdir, language):
- fcc_lang = FCC_LANG_MAP[language]
- index_json_path = pathlib.Path(
- clientdir, "fcc", "curriculum", fcc_lang, "index.json"
- )
- with open(index_json_path) as course_index_str:
- superblock_dict = json.load(course_index_str)[fcc_lang]
-
- redirects = []
- for superblock in superblock_dict:
- course_list = superblock_dict[superblock]
- for course in course_list:
- meta_json_path = pathlib.Path(
- clientdir, "fcc/curriculum/", fcc_lang, superblock, course, "_meta.json"
- )
- challenges = json.loads(meta_json_path.read_text())["challenges"]
- for challenge in challenges:
- title = challenge["title"]
- redirects.append(
- (f'{fcc_lang}/{superblock}/{course}/{challenge["slug"]}', title)
- )
-
- return OrderedDict(redirects).items()
-
-
-def build(arguments):
- clientdir = pathlib.Path(arguments.clientdir)
- outpath = arguments.outzim
- language = arguments.language
- name = arguments.name
- title = arguments.title
- description = arguments.description
- creator = arguments.creator or "freeCodeCamp"
- publisher = arguments.publisher or "openZIM"
-
- logger.info(
- f"Building {clientdir} for {language} => {outpath} - Version: {VERSION}"
- )
-
- fileList = []
-
- # Walk the tree and get a list of files we care about
- for file in clientdir.rglob("*"):
- if file.is_dir():
- continue
- if file.suffix == ".map":
- continue
- fileList.append(file)
-
- main_path = clientdir.joinpath("index.html").relative_to(clientdir)
-
- # Make sure the outpath directory exists
-
- pathlib.Path(outpath).parent.mkdir(parents=True, exist_ok=True)
-
- with Creator(outpath, main_path.as_posix()).config_metadata(
- Name=name,
- Title=title,
- Publisher=publisher,
- Date=datetime.now(),
- Creator=creator,
- Description=description,
- Language=language,
- Tags=";".join(["FCC", "freeCodeCamp"]),
- Scraper=f"fcc2zim V{VERSION}",
- Illustration_48x48_at_1=logo_path.read_bytes(),
- ) as creator:
- for file in fileList:
- print(file)
- path = pathlib.Path(file).relative_to(clientdir).as_posix()
- creator.add_item_for(path, fpath=file)
-
- for redir_slug, redir_title in build_curriculum_redirects(clientdir, language):
- print("Redirect", redir_slug)
- redirect_path = f"{redir_slug}"
- redirect_url = redir_slug.count("/") * "../" + f"index.html#{redir_slug}"
- content = (
- f"
{redir_title}"
- f''
- f""
- )
- creator.add_item_for(
- redirect_path,
- content=bytes(content, "utf-8"),
- title=redir_title,
- mimetype="text/html",
- is_front=True,
- )
- # Example index.html#/english/regular-expressions/extract-matches
diff --git a/openzim/fcctozim/fetch.py b/openzim/fcctozim/fetch.py
deleted file mode 100644
index a1bdd83..0000000
--- a/openzim/fcctozim/fetch.py
+++ /dev/null
@@ -1,32 +0,0 @@
-import pathlib
-import shutil
-import zipfile
-
-import requests
-
-
-def fetch_command(arguments):
- force = arguments.force or False
- tmpdir = pathlib.Path(arguments.tmpdir or "./tmp")
- url = "https://github.com/freeCodeCamp/freeCodeCamp/archive/refs/heads/main.zip"
- zip_path = tmpdir / "main.zip"
- curriculum_path = tmpdir / "curriculum"
-
- curriculum_path.mkdir(parents=True, exist_ok=True)
-
- # Don't redownload the file if we already have it (it's a large file)
- if force or not zip_path.exists():
- resp = requests.get(url, allow_redirects=True)
- zip_path.write_bytes(resp.content)
-
- shutil.rmtree(curriculum_path)
-
- with zipfile.ZipFile(zip_path, "r") as zip_ref:
- members = [
- member
- for member in zip_ref.namelist()
- if member.startswith("freeCodeCamp-main/curriculum/")
- or member.startswith("freeCodeCamp-main/client/i18n/locales")
- ]
- zip_ref.extractall(members=members, path=curriculum_path)
- print(f"Extracted {len(members)} files")
diff --git a/openzim/lint_requirements.txt b/openzim/lint_requirements.txt
deleted file mode 100644
index eb3cbe9..0000000
--- a/openzim/lint_requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-flake8==6.0.0
-isort==5.12.0
-black==23.3.0
\ No newline at end of file
diff --git a/openzim/pypi-readme.rst b/openzim/pypi-readme.rst
deleted file mode 100644
index 9ba5d13..0000000
--- a/openzim/pypi-readme.rst
+++ /dev/null
@@ -1,71 +0,0 @@
-=============
-fcc2zim
-=============
-
-@TODO Update this for fcc2zim usage
-
-A scraper that downloads the whole JS course material for FCC
-(http://freecodecamp.org) and puts it into a locally browsable
-directory and then in a ZIM file (http://www.openzim.org), a clean and
-user friendly format for storing content for offline usage.
-
-------------
-Dependencies
-------------
-
-Ubuntu/debian
--------------
-
-.. code-block:: sh
-
- python-pip python-dev libxml2-dev libxslt-dev advancecomp jpegoptim pngquant p7zip-full gifsicle
-
-
-macOS
------
-
-.. code-block:: sh
-
- brew install advancecomp jpegoptim pngquant p7zip gifsicle
-
-------
-Usage
-------
-
-.. code-block:: sh
-
- fcc2zim
-
-By default (no argument), it runs all the steps: download, parse, export and zim.
-
-
-.. code-block:: sh
-
- -h --help Display this help message
- -y --wipe-db Do not wipe the DB during parse stage
- -F --force Redo step even if target already exist
-
- -l --languages= Comma-separated list of lang codes to filter export to (preferably ISO 639-1, else ISO 639-3)
- -f --formats= Comma-separated list of formats to filter export to (epub, html, pdf, all)
-
- -m --mirror= Use URL as base for all downloads.
- -r --rdf-folder= Don't download rdf-files.tar.bz2 and use extracted folder instead
- -e --static-folder= Use-as/Write-to this folder static HTML
- -z --zim-file= Write ZIM into this file path
- -t --zim-title= Set ZIM title
- -n --zim-desc= Set ZIM description
- -d --dl-folder= Folder to use/write-to downloaded ebooks
- -u --rdf-url= Alternative rdf-files.tar.bz2 URL
- -b --books= Execute the processes for specific books, separated by commas, or dashes for intervals
- -c --concurrency= Number of concurrent process for download and parsing tasks
-
- -x --zim-title= Custom title for the ZIM file
- -q --zim-desc= Custom description for the ZIM file
-
- --check Check dependencies
- --prepare Download & extract rdf-files.tar.bz2
- --parse Parse all RDF files and fill-up the DB
- --download Download ebooks based on filters
- --export Export downloaded content to zim-friendly static HTML
- --dev Exports *just* Home+JS+CSS files (overwritten by --zim step)
- --zim Create a ZIM file
diff --git a/openzim/requirements.txt b/openzim/requirements.txt
deleted file mode 100644
index 0bbadea..0000000
--- a/openzim/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-zimscraperlib>=3.1.0
-requests>=2.28.2
-pyyaml>=6.0.0
\ No newline at end of file
diff --git a/openzim/setup.py b/openzim/setup.py
deleted file mode 100644
index 44d6594..0000000
--- a/openzim/setup.py
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# vim: ai ts=4 sts=4 et sw=4 nu
-
-""" Project Gutemberg ZIM creator for Offline Use """
-
-import pathlib
-from codecs import open
-
-from fcc2zim import VERSION
-from setuptools import find_packages, setup
-
-with open("requirements.pip", "r") as f:
- requirements = [line.strip() for line in f.readlines() if len(line.strip())]
-with open(pathlib.Path(pathlib.Path.parent, "README.md"), "r", "utf-8") as f:
- readme = f.read()
-
-setup(
- name="fcc2zim",
- version=VERSION,
- description=__doc__,
- author="Kiwix",
- author_email="reg@kiwix.org",
- long_description=readme,
- long_description_content_type="text/markdown",
- url="http://github.com/openzim/fcc",
- keywords="fcc zim kiwix openzim offline",
- license="GPL-3.0",
- packages=find_packages("."),
- zip_safe=False,
- platforms="any",
- include_package_data=True,
- data_files=["LICENSE", "requirements.pip"],
- package_dir={"fcc": "fcc"},
- install_requires=requirements,
- scripts=["fcc2zim"],
- classifiers=[
- "Intended Audience :: Developers",
- "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
- "Programming Language :: Python",
- "Programming Language :: Python :: 2.7",
- "Programming Language :: Python :: 3.6",
- ],
-)
diff --git a/openzim/.dockerignore b/scraper/.dockerignore
similarity index 100%
rename from openzim/.dockerignore
rename to scraper/.dockerignore
diff --git a/scraper/pyproject.toml b/scraper/pyproject.toml
new file mode 100644
index 0000000..9cda4db
--- /dev/null
+++ b/scraper/pyproject.toml
@@ -0,0 +1,223 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "fcc2zim"
+authors = [
+ { name = "Kiwix", email = "dev@kiwix.org" },
+]
+keywords = ["fcc","freecodecamp","zim","kiwix","openzim","offline"]
+requires-python = ">=3.11"
+description = "Make ZIM files from freeCodeCamp courses"
+readme = "../README.md"
+license = {text = "GPL-3.0-or-later"}
+classifiers = [
+ "Programming Language :: Python :: 3",
+ "Programming Language :: Python :: 3.11",
+ "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
+]
+dependencies = [
+ "zimscraperlib==3.1.1",
+ "requests==2.31.0",
+ "PyYAML==6.0.1",
+]
+dynamic = ["version"]
+
+[project.optional-dependencies]
+scripts = [
+ "invoke==2.2.0",
+]
+lint = [
+ "black==23.7.0",
+ "ruff==0.0.285",
+]
+check = [
+ "pyright==1.1.323",
+]
+test = [
+ "pytest==7.4.0",
+ "coverage==7.3.0",
+]
+dev = [
+ "pre-commit==3.3.3",
+ "debugpy==1.6.7",
+ "fcc2zim[scripts]",
+ "fcc2zim[lint]",
+ "fcc2zim[test]",
+ "fcc2zim[check]",
+]
+
+[project.urls]
+Homepage = "https://github.com/openzim/freecodecamp"
+Donate = "https://www.kiwix.org/en/support-us/"
+
+[project.scripts]
+fcc2zim = "fcc2zim:entrypoint.main"
+
+[tool.hatch.version]
+path = "src/fcc2zim/__about__.py"
+
+[tool.hatch.envs.default]
+features = ["dev"]
+
+[tool.hatch.envs.test]
+features = ["scripts", "test"]
+
+[tool.hatch.envs.test.scripts]
+run = "inv test --args '{args}'"
+run-cov = "inv test-cov --args '{args}'"
+report-cov = "inv report-cov"
+coverage = "inv coverage --args '{args}'"
+html = "inv coverage --html --args '{args}'"
+
+[tool.hatch.envs.lint]
+template = "lint"
+skip-install = false
+features = ["scripts", "lint"]
+
+[tool.hatch.envs.lint.scripts]
+black = "inv lint-black --args '{args}'"
+ruff = "inv lint-ruff --args '{args}'"
+all = "inv lintall --args '{args}'"
+fix-black = "inv fix-black --args '{args}'"
+fix-ruff = "inv fix-ruff --args '{args}'"
+fixall = "inv fixall --args '{args}'"
+
+[tool.hatch.envs.check]
+features = ["scripts", "check"]
+
+[tool.hatch.envs.check.scripts]
+pyright = "inv check-pyright --args '{args}'"
+all = "inv checkall --args '{args}'"
+
+[tool.black]
+line-length = 88
+target-version = ['py311']
+
+[tool.ruff]
+target-version = "py311"
+line-length = 88
+src = ["src"]
+select = [
+ "A", # flake8-builtins
+ # "ANN", # flake8-annotations
+ "ARG", # flake8-unused-arguments
+ # "ASYNC", # flake8-async
+ "B", # flake8-bugbear
+ # "BLE", # flake8-blind-except
+ "C4", # flake8-comprehensions
+ "C90", # mccabe
+ # "COM", # flake8-commas
+ # "D", # pydocstyle
+ # "DJ", # flake8-django
+ "DTZ", # flake8-datetimez
+ "E", # pycodestyle (default)
+ "EM", # flake8-errmsg
+ # "ERA", # eradicate
+ # "EXE", # flake8-executable
+ "F", # Pyflakes (default)
+ # "FA", # flake8-future-annotations
+ "FBT", # flake8-boolean-trap
+ # "FLY", # flynt
+ # "G", # flake8-logging-format
+ "I", # isort
+ "ICN", # flake8-import-conventions
+ # "INP", # flake8-no-pep420
+ # "INT", # flake8-gettext
+ "ISC", # flake8-implicit-str-concat
+ "N", # pep8-naming
+ # "NPY", # NumPy-specific rules
+ # "PD", # pandas-vet
+ # "PGH", # pygrep-hooks
+ # "PIE", # flake8-pie
+ # "PL", # Pylint
+ "PLC", # Pylint: Convention
+ "PLE", # Pylint: Error
+ "PLR", # Pylint: Refactor
+ "PLW", # Pylint: Warning
+ # "PT", # flake8-pytest-style
+ # "PTH", # flake8-use-pathlib
+ # "PYI", # flake8-pyi
+ "Q", # flake8-quotes
+ # "RET", # flake8-return
+ # "RSE", # flake8-raise
+ "RUF", # Ruff-specific rules
+ "S", # flake8-bandit
+ # "SIM", # flake8-simplify
+ # "SLF", # flake8-self
+ "T10", # flake8-debugger
+ "T20", # flake8-print
+ # "TCH", # flake8-type-checking
+ # "TD", # flake8-todos
+ "TID", # flake8-tidy-imports
+ # "TRY", # tryceratops
+ "UP", # pyupgrade
+ "W", # pycodestyle
+ "YTT", # flake8-2020
+]
+ignore = [
+ # Allow non-abstract empty methods in abstract base classes
+ "B027",
+ # Allow use of date.today
+ "DTZ011",
+ # Remove flake8-errmsg since we consider they bloat the code and provide limited value
+ "EM",
+ # Allow boolean positional values in function calls, like `dict.get(... True)`
+ "FBT003",
+ # Ignore checks for possible passwords
+ "S105", "S106", "S107",
+ # Ignore warnings on subprocess.run / popen
+ "S603",
+ # Ignore complexity
+ "C901", "PLR0911", "PLR0912", "PLR0913", "PLR0915",
+]
+unfixable = [
+ # Don't touch unused imports
+ "F401",
+]
+
+[tool.ruff.isort]
+known-first-party = ["fcc2zim"]
+
+[tool.ruff.flake8-bugbear]
+# add exceptions to B008 for fastapi.
+extend-immutable-calls = ["fastapi.Depends", "fastapi.Query"]
+
+[tool.ruff.flake8-tidy-imports]
+ban-relative-imports = "all"
+
+[tool.ruff.per-file-ignores]
+# Tests can use magic values, assertions, and relative imports
+"tests/**/*" = ["PLR2004", "S101", "TID252"]
+
+[tool.pytest.ini_options]
+minversion = "7.4"
+testpaths = ["tests"]
+pythonpath = [".", "src"]
+
+[tool.coverage.paths]
+fcc2zim = ["src/fcc2zim"]
+tests = ["tests"]
+
+[tool.coverage.run]
+source_pkgs = ["fcc2zim"]
+branch = true
+parallel = true
+omit = [
+ "src/fcc2zim/__about__.py",
+]
+
+[tool.coverage.report]
+exclude_lines = [
+ "no cov",
+ "if __name__ == .__main__.:",
+ "if TYPE_CHECKING:",
+]
+
+[tool.pyright]
+include = ["src", "tests", "tasks.py"]
+exclude = [".env/**", ".venv/**"]
+extraPaths = ["src"]
+pythonVersion = "3.11"
+typeCheckingMode="basic"
diff --git a/scraper/src/fcc2zim/__about__.py b/scraper/src/fcc2zim/__about__.py
new file mode 100644
index 0000000..4e8976a
--- /dev/null
+++ b/scraper/src/fcc2zim/__about__.py
@@ -0,0 +1 @@
+__version__ = "1.0.0-dev0"
diff --git a/client/.eslintignore b/scraper/src/fcc2zim/__init__.py
similarity index 100%
rename from client/.eslintignore
rename to scraper/src/fcc2zim/__init__.py
diff --git a/scraper/src/fcc2zim/__main__.py b/scraper/src/fcc2zim/__main__.py
new file mode 100644
index 0000000..85a0eb7
--- /dev/null
+++ b/scraper/src/fcc2zim/__main__.py
@@ -0,0 +1,4 @@
+from fcc2zim.entrypoint import main
+
+if __name__ == "__main__":
+ main()
diff --git a/openzim/fcc_48.png b/scraper/src/fcc2zim/assets/fcc_48.png
similarity index 100%
rename from openzim/fcc_48.png
rename to scraper/src/fcc2zim/assets/fcc_48.png
diff --git a/scraper/src/fcc2zim/build.py b/scraper/src/fcc2zim/build.py
new file mode 100644
index 0000000..549b14e
--- /dev/null
+++ b/scraper/src/fcc2zim/build.py
@@ -0,0 +1,91 @@
+import json
+from collections import OrderedDict
+from pathlib import Path
+
+from zimscraperlib.zim import Creator
+
+from fcc2zim.constants import Global
+
+
+def build_curriculum_redirects(curriculum_dist_dir: Path, fcc_lang: str):
+ """
+ Build the list of redirects from challenge URL to Vite hash URL
+
+ The Vite app uses its own router to navigate. We have a single HTML file, but we
+ need an URL for each challenge for the zim search to work.
+ This builds the list of redirect needed fron the challenge URL to Vite hash URL.
+ """
+ index_json_path = curriculum_dist_dir.joinpath("curriculum", fcc_lang, "index.json")
+ with open(index_json_path) as course_index_str:
+ superblock_dict = json.load(course_index_str)[fcc_lang]
+
+ redirects = []
+ for superblock in superblock_dict:
+ course_list = superblock_dict[superblock]
+ for course in course_list:
+ meta_json_path = Path(
+ curriculum_dist_dir,
+ "curriculum",
+ fcc_lang,
+ superblock,
+ course,
+ "_meta.json",
+ )
+ challenges = json.loads(meta_json_path.read_text())["challenges"]
+ for challenge in challenges:
+ title = challenge["title"]
+ redirects.append(
+ (f'{fcc_lang}/{superblock}/{course}/{challenge["slug"]}', title)
+ )
+
+ return OrderedDict(redirects).items()
+
+
+def build_command(
+ zimui_dist_dir: Path,
+ fcc_lang: str,
+ creator: Creator,
+ curriculum_dist_dir: Path,
+):
+ Global.logger.info("Scraper: build phase starting")
+
+ # Add zimui files
+ for file in zimui_dist_dir.rglob("*"):
+ if file.is_dir():
+ continue
+ path = str(Path(file).relative_to(zimui_dist_dir))
+ Global.logger.debug(f"Adding {path} to ZIM")
+ creator.add_item_for(path, fpath=file)
+
+ # Add prebuild generated curriculum file
+ for file in curriculum_dist_dir.rglob("*"):
+ if file.is_dir():
+ continue
+ path = str(Path("fcc").joinpath(Path(file).relative_to(curriculum_dist_dir)))
+ Global.logger.debug(f"Adding {path} to ZIM")
+ creator.add_item_for(path, fpath=file)
+
+ for redir_slug, redir_title in build_curriculum_redirects(
+ curriculum_dist_dir=curriculum_dist_dir, fcc_lang=fcc_lang
+ ):
+ redirect_path = f"{redir_slug}"
+ redirect_url = redir_slug.count("/") * "../" + f"index.html#{redir_slug}"
+ content = (
+ f"{redir_title}"
+ f''
+ f""
+ )
+ Global.logger.debug(
+ f"Redirecting {redirect_path} to {redirect_url} for slug {redir_slug}"
+ f"and title {redir_title}",
+ )
+ creator.add_item_for(
+ redirect_path,
+ content=bytes(content, "utf-8"),
+ title=redir_title,
+ mimetype="text/html",
+ is_front=True,
+ )
+ # Example index.html#/english/regular-expressions/extract-matches
+
+ Global.logger.info("Scraper: build phase finished")
diff --git a/openzim/fcctozim/challenge.py b/scraper/src/fcc2zim/challenge.py
similarity index 90%
rename from openzim/fcctozim/challenge.py
rename to scraper/src/fcc2zim/challenge.py
index 32c88bc..2ccae30 100644
--- a/openzim/fcctozim/challenge.py
+++ b/scraper/src/fcc2zim/challenge.py
@@ -1,5 +1,4 @@
import pathlib
-from typing import Union
import yaml
@@ -15,14 +14,14 @@ def read_yaml_frontmatter(filename: pathlib.Path):
class Challenge:
- def __init__(self, fpath: Union[str, pathlib.Path]) -> None:
+ def __init__(self, fpath: str | pathlib.Path) -> None:
self.path = pathlib.Path(fpath)
self.course_slug = self.path.parent.stem
self.course_superblock = "-".join(self.path.parent.parent.stem.split("-")[1:])
self.language = self.path.parent.parent.parent.stem
self._frontmatter = None
- def id(self):
+ def identifier(self):
return str(self.frontmatter()["id"])
def title(self):
diff --git a/scraper/src/fcc2zim/constants.py b/scraper/src/fcc2zim/constants.py
new file mode 100644
index 0000000..ef2c2d0
--- /dev/null
+++ b/scraper/src/fcc2zim/constants.py
@@ -0,0 +1,32 @@
+import logging
+
+from zimscraperlib.logging import getLogger
+
+from fcc2zim.__about__ import __version__
+
+FCC_LANG_MAP = {
+ "ara": "arabic",
+ "cmn": "chinese",
+ "lzh": "chinese-traditional",
+ "eng": "english",
+ "spa": "espanol",
+ "deu": "german",
+ "ita": "italian",
+ "jpn": "japanese",
+ "por": "portuguese",
+ "ukr": "ukranian",
+}
+
+VERSION = __version__
+
+
+class Global:
+ debug = False
+ logger: logging.Logger = getLogger("fcc2zim", level=logging.INFO)
+
+
+def set_debug(*, debug: bool):
+ Global.debug = debug
+ Global.logger = getLogger( # refresh logger to update log level
+ "fcc2zim", level=logging.DEBUG if Global.debug else logging.INFO
+ )
diff --git a/scraper/src/fcc2zim/entrypoint.py b/scraper/src/fcc2zim/entrypoint.py
new file mode 100644
index 0000000..8623a8f
--- /dev/null
+++ b/scraper/src/fcc2zim/entrypoint.py
@@ -0,0 +1,165 @@
+import argparse
+import datetime
+import functools
+import os
+
+from zimscraperlib.constants import (
+ MAXIMUM_DESCRIPTION_METADATA_LENGTH as MAX_DESC_LENGTH,
+)
+from zimscraperlib.constants import (
+ MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH as MAX_LONG_DESC_LENGTH,
+)
+
+from fcc2zim.constants import FCC_LANG_MAP, VERSION, Global, set_debug
+from fcc2zim.scraper import Scraper
+
+
+def log_and_sys_exit(func):
+ @functools.wraps(func)
+ def wrapper():
+ try:
+ func()
+ except SystemExit: # SystemExit has been asked for at lower level, simply do it
+ raise
+ except Exception as exc:
+ Global.logger.error(f"A fatal error occurred: {exc}")
+ Global.logger.exception(exc)
+ raise SystemExit(1) from exc
+
+ return wrapper
+
+
+@log_and_sys_exit
+def main():
+ parser = argparse.ArgumentParser(
+ prog="fcc2zim",
+ description="Scraper to create ZIM files from freeCodeCamp courses",
+ )
+
+ parser.add_argument(
+ "--course",
+ type=str,
+ help="Course or course list (separated by commas)",
+ required=True,
+ )
+ parser.add_argument(
+ "--language",
+ type=str,
+ help="Curriculum language",
+ required=True,
+ choices=FCC_LANG_MAP.keys(),
+ )
+ parser.add_argument(
+ "--name",
+ type=str,
+ help="ZIM name. Used as identifier and filename (date will be appended)",
+ required=True,
+ )
+ parser.add_argument(
+ "--title",
+ type=str,
+ # once Zimscraperlib > 3.1.1 is released, use constant from library
+ # instead of '30' magic number
+ help="Title of zim file (less than 30 chars)",
+ required=True,
+ )
+ parser.add_argument(
+ "--description",
+ type=str,
+ help=f"Description of ZIM file (less than {MAX_DESC_LENGTH} chars)",
+ required=True,
+ )
+ parser.add_argument(
+ "--long-description",
+ type=str,
+ help=f"Long description of ZIM file (less than {MAX_LONG_DESC_LENGTH} chars)",
+ )
+ parser.add_argument(
+ "--creator",
+ type=str,
+ help="Name of freeCodeCamp courses creator",
+ default="freeCodeCamp",
+ )
+ parser.add_argument(
+ "--publisher", type=str, help="Publisher of the zim file", default="OpenZIM"
+ )
+ parser.add_argument(
+ "--force",
+ help="Force a full reprocessing, not benefiting from any cached file",
+ action="store_true",
+ default=False,
+ )
+ parser.add_argument(
+ "--debug",
+ help="Enable verbose output",
+ action="store_true",
+ default=False,
+ )
+ parser.add_argument(
+ "--output-dir",
+ type=str,
+ help="Output directory where zim file will be built",
+ default=os.getenv("OUTPUT_DIR", "../output"),
+ )
+ parser.add_argument(
+ "--build-dir",
+ type=str,
+ help="The build directory to hold temporary files during scraper operation",
+ default=os.getenv("BUILD_DIR", "../build"),
+ )
+ parser.add_argument(
+ "--zimui-dist-dir",
+ type=str,
+ help=(
+ "Directory containing Vite build output from the Zim UI Vue.JS application"
+ ),
+ default=os.getenv("ZIMUI_DIST_DIR", "../zimui/dist"),
+ )
+ parser.add_argument(
+ "--zim-file",
+ type=str,
+ help="ZIM file name (based on --name if not provided), could contain {period}"
+ " placeholder which will be replaced by _",
+ )
+ parser.add_argument(
+ "--zip-path",
+ help="Path to zip file containing FCC courses",
+ type=str,
+ )
+ parser.add_argument(
+ "--version",
+ help="Display scraper version and exit",
+ action="version",
+ version=f"fcc2zim {VERSION}",
+ )
+
+ args = parser.parse_args()
+
+ Global.logger.info(f"Starting fcc2zim {VERSION}")
+
+ set_debug(debug=args.debug)
+
+ scraper = Scraper(
+ do_fetch=os.getenv("DO_FETCH", "False").lower() == "true",
+ do_prebuild=os.getenv("DO_PREBUILD", "False").lower() == "true",
+ do_build=os.getenv("DO_BUILD", "False").lower() == "true",
+ zimui_dist_dir=args.zimui_dist_dir,
+ output_dir=args.output_dir,
+ build_dir=args.build_dir,
+ language=args.language,
+ name=args.name,
+ title=args.title,
+ description=args.description,
+ long_description=args.long_description,
+ content_creator=args.creator,
+ publisher=args.publisher,
+ zim_file=args.zim_file,
+ force=args.force,
+ course_csv=args.course,
+ zip_path=args.zip_path,
+ start_date=datetime.date.today(),
+ )
+
+ scraper.run()
+
+ Global.logger.info("Scraper completed")
diff --git a/scraper/src/fcc2zim/fetch.py b/scraper/src/fcc2zim/fetch.py
new file mode 100644
index 0000000..b43f700
--- /dev/null
+++ b/scraper/src/fcc2zim/fetch.py
@@ -0,0 +1,36 @@
+import shutil
+import zipfile
+from pathlib import Path
+
+import requests
+
+from fcc2zim.constants import Global
+
+
+def fetch_command(zip_path: Path, curriculum_raw_dir: Path, *, force: bool):
+ Global.logger.info("Scraper: fetch phase starting")
+ url = "https://github.com/freeCodeCamp/freeCodeCamp/archive/refs/heads/main.zip"
+
+ # Don't redownload the file if we already have it (it's a large file)
+ if force or not zip_path.exists():
+ Global.logger.debug(f"Download zip file to {zip_path}")
+ resp = requests.get(url, allow_redirects=True, timeout=5)
+ zip_path.write_bytes(resp.content)
+ else:
+ Global.logger.debug(f"Using existing zip file {zip_path}")
+
+ curriculum_raw_dir.mkdir(parents=True, exist_ok=True)
+ shutil.rmtree(curriculum_raw_dir)
+
+ Global.logger.debug("Extracting files")
+ with zipfile.ZipFile(zip_path, "r") as zip_ref:
+ members = [
+ member
+ for member in zip_ref.namelist()
+ if member.startswith("freeCodeCamp-main/curriculum/")
+ or member.startswith("freeCodeCamp-main/client/i18n/locales")
+ ]
+ zip_ref.extractall(members=members, path=curriculum_raw_dir)
+ Global.logger.info(f"Extracted {len(members)} files")
+ Global.logger.info(f"Fetched curriculum into {curriculum_raw_dir}")
+ Global.logger.info("Scraper: fetch phase finished")
diff --git a/openzim/fcctozim/prebuild.py b/scraper/src/fcc2zim/prebuild.py
similarity index 52%
rename from openzim/fcctozim/prebuild.py
rename to scraper/src/fcc2zim/prebuild.py
index 8fe44f9..67e0a70 100644
--- a/openzim/fcctozim/prebuild.py
+++ b/scraper/src/fcc2zim/prebuild.py
@@ -1,17 +1,16 @@
import json
-import pathlib
import shutil
-from typing import List
+from pathlib import Path
-from fcctozim import FCC_LANG_MAP
-from fcctozim.challenge import Challenge
+from fcc2zim.challenge import Challenge
+from fcc2zim.constants import Global
def get_challenges_for_lang(tmp_path, language="english"):
- return pathlib.Path(tmp_path, language).rglob("*.md")
+ return Path(tmp_path, language).rglob("*.md")
-def update_index(path: pathlib.Path, superblock: str, slug: str, language="english"):
+def update_index(path: Path, superblock: str, slug: str, language="english"):
index_path = path.joinpath("index.json")
if not index_path.exists():
index_path.write_bytes(json.dumps({}).encode("utf-8"))
@@ -33,17 +32,15 @@ def update_index(path: pathlib.Path, superblock: str, slug: str, language="engli
"""
-def write_locales_to_path(
- source_dir: pathlib.Path, outdir: pathlib.Path, language="english"
-):
- shutil.copytree(source_dir, outdir / "locales" / language)
+def write_locales_to_path(source_dir: Path, curriculumdir: Path, language="english"):
+ shutil.copytree(source_dir, curriculumdir / "locales" / language)
def write_course_to_path(
- challenge_list: List[Challenge],
+ challenge_list: list[Challenge],
superblock: str,
course_slug: str,
- outdir: pathlib.Path,
+ curriculumdir: Path,
):
"""Writes the course to the chosen path.
@@ -53,11 +50,11 @@ def write_course_to_path(
Finally, we udpate the root index.json file with the course, which allows
us to render a page listing all available courses
"""
- outdir.mkdir(parents=True, exist_ok=True)
+ curriculumdir.mkdir(parents=True, exist_ok=True)
meta = {"challenges": []}
for challenge in challenge_list:
- challenge_dest_path = outdir.joinpath(
+ challenge_dest_path = curriculumdir.joinpath(
challenge.course_superblock, challenge.course_slug
)
challenge_dest_path.mkdir(parents=True, exist_ok=True)
@@ -66,39 +63,48 @@ def write_course_to_path(
{"title": challenge.title(), "slug": challenge.path.stem}
)
- meta_path = outdir.joinpath(superblock, course_slug, "_meta.json")
+ meta_path = curriculumdir.joinpath(superblock, course_slug, "_meta.json")
meta_path.parent.mkdir(parents=True, exist_ok=True)
with open(meta_path, "w") as outfile:
json.dump(meta, outfile, indent=4)
# Create an index with a list of the courses
- update_index(outdir, superblock, course_slug, challenge_list[0].language)
+ update_index(curriculumdir, superblock, course_slug, challenge_list[0].language)
-def prebuild_command(arguments):
- """Writes out a structure of challenges to output dir:
+def prebuild_command(
+ course_csv: str,
+ fcc_lang: str,
+ curriculum_raw_dir: Path,
+ curriculum_dist_dir: Path,
+):
+ """Transform raw data in curriculum_raw_dir into pre-built data in
+ curriculum_dist_dir
- /output_dir/index.json => { 'english': {'superblock': ['basic-javascript'] } }
- /output_dir/english///_meta.json
+ E.g. if lang in english:
+ - curriculum_dist_dir/index.json
+ => { 'english': {'superblock': ['basic-javascript'] } }
+ - curriculum_dist_dir/english///_meta.json
=> { challenges: [{slug, title}] }
- /output_dir/english///{slug}.md
+ - curriculum_dist_dir/english///{slug}.md
"""
- course_list_str = str(arguments.course)
- outdir = pathlib.Path(arguments.outdir)
- lang = FCC_LANG_MAP[arguments.language]
- tmpdir = arguments.tmpdir or "./tmp"
- curriculum_dir = pathlib.Path(
- tmpdir, "curriculum", "freeCodeCamp-main", "curriculum", "challenges"
+ Global.logger.info("Scraper: prebuild phase starting")
+
+ curriculum_dist_dir.mkdir(parents=True, exist_ok=True)
+ shutil.rmtree(curriculum_dist_dir)
+
+ challenges_dir = curriculum_raw_dir.joinpath(
+ "freeCodeCamp-main", "curriculum", "challenges"
)
- locales_dir = pathlib.Path(
- tmpdir, "curriculum", "freeCodeCamp-main", "client", "i18n", "locales", lang
+ locales_dir = curriculum_raw_dir.joinpath(
+ "freeCodeCamp-main", "client", "i18n", "locales", fcc_lang
)
# eg. ['basic-javascript', 'debugging']
- for course in course_list_str.split(","):
- print(f"Prebuilding {course}")
+ for course in course_csv.split(","):
+ Global.logger.debug(f"Prebuilding {course}")
meta = json.loads(
- curriculum_dir.joinpath("_meta", course, "meta.json").read_text()
+ challenges_dir.joinpath("_meta", course, "meta.json").read_text()
)
# Get the order that the challenges should be completed in for
ids = [
@@ -107,24 +113,24 @@ def prebuild_command(arguments):
]
superblock = meta["superBlock"]
- challenge_list: List[Challenge] = []
- for file in get_challenges_for_lang(curriculum_dir, lang):
+ challenge_list: list[Challenge] = []
+ for file in get_challenges_for_lang(challenges_dir, fcc_lang):
challenge = Challenge(file)
if challenge.course_superblock != superblock:
continue
# ID is a UUID the Challenge, the only add it to the challenge list if it's
# a part of the course.
- if challenge.id() in ids:
+ if challenge.identifier() in ids:
challenge_list.append(challenge)
write_course_to_path(
- sorted(challenge_list, key=lambda x: ids.index(x.id())),
+ sorted(challenge_list, key=lambda x: ids.index(x.identifier())),
superblock,
course,
- outdir.joinpath("curriculum", lang),
+ curriculum_dist_dir.joinpath("curriculum", fcc_lang),
)
- print(f"Prebuilt {course}")
# Copy all the locales for this language
- write_locales_to_path(locales_dir, outdir, lang)
- print(f"Prebuilt curriculum into {outdir}")
+ write_locales_to_path(locales_dir, curriculum_dist_dir, fcc_lang)
+ Global.logger.info(f"Prebuilt curriculum into {curriculum_dist_dir}")
+ Global.logger.info("Scraper: prebuild phase finished")
diff --git a/scraper/src/fcc2zim/scraper.py b/scraper/src/fcc2zim/scraper.py
new file mode 100644
index 0000000..9430cef
--- /dev/null
+++ b/scraper/src/fcc2zim/scraper.py
@@ -0,0 +1,166 @@
+import datetime
+from pathlib import Path
+
+from zimscraperlib.zim import Creator
+
+from fcc2zim.build import build_command
+from fcc2zim.constants import FCC_LANG_MAP, VERSION, Global
+from fcc2zim.fetch import fetch_command
+from fcc2zim.prebuild import prebuild_command
+from fcc2zim.zimscraperlib_fork import compute_descriptions
+
+
+class Scraper:
+ def __init__(
+ self,
+ *,
+ do_fetch: bool,
+ do_prebuild: bool,
+ do_build: bool,
+ zimui_dist_dir: str,
+ output_dir: str,
+ build_dir: str,
+ language: str,
+ name: str,
+ title: str,
+ description: str,
+ long_description: str | None,
+ content_creator: str,
+ publisher: str,
+ zim_file: str | None,
+ force: bool,
+ course_csv: str,
+ zip_path: str | None,
+ start_date: datetime.date,
+ ):
+ self.creator = None
+
+ self.do_fetch = do_fetch
+ self.do_prebuild = do_prebuild
+ self.do_build = do_build
+
+ if not (self.do_fetch + self.do_prebuild + self.do_build):
+ self.do_fetch = self.do_prebuild = self.do_build = True
+
+ self.zimui_dist_dir = Path(zimui_dist_dir)
+ if not self.zimui_dist_dir.exists():
+ raise ValueError(f"zimui_dist_dir {self.zimui_dist_dir} does not exists")
+
+ self.output_dir = Path(output_dir)
+ self.build_dir = Path(build_dir)
+ self.curriculum_raw_dir = self.build_dir.joinpath("curriculum-raw")
+ self.curriculum_dist_dir = self.build_dir.joinpath("curriculum-dist")
+
+ # Make sure the output directory exists
+ self.output_dir.mkdir(parents=True, exist_ok=True)
+ self.build_dir.mkdir(parents=True, exist_ok=True)
+
+ self.language = language
+ if self.language not in FCC_LANG_MAP:
+ raise ValueError(f"Unsupported language {self.language}")
+ self.fcc_lang = FCC_LANG_MAP[language]
+
+ self.name = name
+ self.title = title
+
+ self.description = description
+ self.long_description = long_description
+ self.description, self.long_description = compute_descriptions(
+ self.description, self.description, self.long_description
+ )
+
+ self.content_creator = content_creator
+ self.publisher = publisher
+ self.force = force
+ self.course_csv = course_csv
+ if not zip_path:
+ self.zip_path = self.build_dir.joinpath("main.zip")
+ else:
+ self.zip_path = Path(zip_path)
+ if not self.zip_path.exists():
+ raise ValueError(f"Zip file not found in {self.zip_path}")
+
+ # if we do not build the ZIM, we can stop here
+ if not self.do_build:
+ return
+
+ period = start_date.strftime("%Y-%m")
+ if zim_file:
+ self.zim_path = Path(zim_file.format(period=period))
+ # make sure we were given a filename and not a path
+ if Path(self.zim_path.name) != self.zim_path:
+ raise ValueError(f"zim_name is not a filename: {zim_file}")
+ else:
+ self.zim_path = Path(f"{name}_{period}.zim")
+
+ # build full path
+ self.zim_path = self.output_dir.joinpath(self.zim_path)
+
+ if self.zim_path.exists():
+ if not self.force:
+ raise ValueError(f"ZIM file {self.zim_path} already exist.")
+ Global.logger.info(f"Removing existing ZIM file {self.zim_path}")
+ self.zim_path.unlink()
+ else:
+ Global.logger.info(f"ZIM path: {self.zim_path}")
+
+ logo_path = Path(__file__).parent.joinpath("assets", "fcc_48.png")
+ if not logo_path.exists():
+ raise ValueError(f"Logo not found at {logo_path}")
+
+ self.creator = Creator(self.zim_path, "index.html").config_metadata(
+ Name=self.name,
+ Title=self.title,
+ Publisher=self.publisher,
+ Date=start_date,
+ Creator=self.content_creator,
+ Description=self.description,
+ LongDescription=self.long_description,
+ Language=self.language,
+ Tags=";".join(["FCC", "freeCodeCamp"]),
+ Scraper=f"fcc2zim v{VERSION}",
+ Illustration_48x48_at_1=logo_path.read_bytes(),
+ )
+
+ # start creator early to detect any problem early as well
+ self.creator.start()
+
+ def run(self):
+ try:
+ self.run_commands()
+ except Exception as exc:
+ if self.creator:
+ self.creator.can_finish = False
+ if isinstance(exc, KeyboardInterrupt):
+ Global.logger.error("KeyboardInterrupt, exiting.")
+ raise SystemExit(3) from exc
+ else:
+ Global.logger.error(f"Interrupting process due to error: {exc}")
+ Global.logger.exception(exc)
+ raise SystemExit(2) from exc
+ else:
+ if self.creator:
+ self.creator.finish()
+ Global.logger.info(f"Finished creating Zim at {self.zim_path}")
+
+ def run_commands(self):
+ if self.do_fetch:
+ fetch_command(
+ force=self.force,
+ curriculum_raw_dir=self.curriculum_raw_dir,
+ zip_path=self.zip_path,
+ )
+ if self.do_prebuild:
+ prebuild_command(
+ fcc_lang=self.fcc_lang,
+ course_csv=self.course_csv,
+ curriculum_raw_dir=self.curriculum_raw_dir,
+ curriculum_dist_dir=self.curriculum_dist_dir,
+ )
+ if self.do_build:
+ build_command(
+ fcc_lang=self.fcc_lang,
+ creator=self.creator,
+ zimui_dist_dir=self.zimui_dist_dir,
+ curriculum_dist_dir=self.curriculum_dist_dir,
+ )
diff --git a/scraper/src/fcc2zim/zimscraperlib_fork.py b/scraper/src/fcc2zim/zimscraperlib_fork.py
new file mode 100644
index 0000000..0caa9d1
--- /dev/null
+++ b/scraper/src/fcc2zim/zimscraperlib_fork.py
@@ -0,0 +1,62 @@
+from zimscraperlib.constants import (
+ MAXIMUM_DESCRIPTION_METADATA_LENGTH as MAX_DESC_LENGTH,
+)
+from zimscraperlib.constants import (
+ MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH as MAX_LONG_DESC_LENGTH,
+)
+
+# This function will be released in zimscraperlib 3.1.2
+# Until then, it is forked here for convenience
+
+
+def compute_descriptions(
+ default_description: str,
+ user_description: str | None,
+ user_long_description: str | None,
+) -> tuple[str, str | None]:
+ """Computes short and long descriptions compliant with ZIM standard.
+
+ Based on provided parameters, the function computes a short and a long description
+ which are compliant with the ZIM standard (in terms of length).
+
+ User description(s) are used if set. They are checked to not exceed ZIM standard
+ maximum length ; an error is thrown otherwise ; if ok, they are returned.
+
+ If user_description is not set, the description is computed based on the default
+ description, truncated if needed.
+
+ If user_long_description is not set and default description is too long for the
+ description field, the long_description is computed based on the default description
+ (truncated if needed), otherwise no long description is returned.
+
+ args:
+ default_description: the description which will be used if user descriptions
+ are not set (typically fetched online)
+ user_description: the description set by the user (typically set by a
+ CLI argument)
+ user_long_description: the long description set by the user (typically set by a
+ CLI argument)
+
+ Returns a tuple of (description, long_description)
+ """
+
+ if user_description and len(user_description) > MAX_DESC_LENGTH:
+ raise ValueError(
+ f"Description too long ({len(user_description)}>{MAX_DESC_LENGTH})"
+ )
+ if user_long_description and len(user_long_description) > MAX_LONG_DESC_LENGTH:
+ raise ValueError(
+ f"LongDescription too long ({len(user_long_description)}"
+ f">{MAX_LONG_DESC_LENGTH})"
+ )
+
+ if not user_long_description and len(default_description) > MAX_DESC_LENGTH:
+ user_long_description = default_description[0:MAX_LONG_DESC_LENGTH]
+ if len(default_description) > MAX_LONG_DESC_LENGTH:
+ user_long_description = user_long_description[:-1] + "…"
+ if not user_description:
+ user_description = default_description[0:MAX_DESC_LENGTH]
+ if len(default_description) > MAX_DESC_LENGTH:
+ user_description = user_description[:-1] + "…"
+
+ return (user_description, user_long_description)
diff --git a/scraper/tasks.py b/scraper/tasks.py
new file mode 100644
index 0000000..90854e8
--- /dev/null
+++ b/scraper/tasks.py
@@ -0,0 +1,109 @@
+# pyright: strict, reportUntypedFunctionDecorator=false
+import os
+
+from invoke.context import Context
+from invoke.tasks import task # pyright: ignore [reportUnknownVariableType]
+
+use_pty = not os.getenv("CI", "")
+
+
+@task(optional=["args"], help={"args": "pytest additional arguments"})
+def test(ctx: Context, args: str = ""):
+ """run tests (without coverage)"""
+ ctx.run(f"pytest {args}", pty=use_pty)
+
+
+@task(optional=["args"], help={"args": "pytest additional arguments"})
+def test_cov(ctx: Context, args: str = ""):
+ """run test vith coverage"""
+ ctx.run(f"coverage run -m pytest {args}", pty=use_pty)
+
+
+@task(optional=["html"], help={"html": "flag to export html report"})
+def report_cov(ctx: Context, *, html: bool = False):
+ """report coverage"""
+ ctx.run("coverage combine", warn=True, pty=use_pty)
+ ctx.run("coverage report --show-missing", pty=use_pty)
+ if html:
+ ctx.run("coverage html", pty=use_pty)
+
+
+@task(
+ optional=["args", "html"],
+ help={
+ "args": "pytest additional arguments",
+ "html": "flag to export html report",
+ },
+)
+def coverage(ctx: Context, args: str = "", *, html: bool = False):
+ """run tests and report coverage"""
+ test_cov(ctx, args=args)
+ report_cov(ctx, html=html)
+
+
+@task(optional=["args"], help={"args": "black additional arguments"})
+def lint_black(ctx: Context, args: str = "."):
+ args = args or "." # needed for hatch script
+ ctx.run("black --version", pty=use_pty)
+ ctx.run(f"black --check --diff {args}", pty=use_pty)
+
+
+@task(optional=["args"], help={"args": "ruff additional arguments"})
+def lint_ruff(ctx: Context, args: str = "."):
+ args = args or "." # needed for hatch script
+ ctx.run("ruff --version", pty=use_pty)
+ ctx.run(f"ruff check {args}", pty=use_pty)
+
+
+@task(
+ optional=["args"],
+ help={
+ "args": "linting tools (black, ruff) additional arguments, typically a path",
+ },
+)
+def lintall(ctx: Context, args: str = "."):
+ """Check linting"""
+ args = args or "." # needed for hatch script
+ lint_black(ctx, args)
+ lint_ruff(ctx, args)
+
+
+@task(optional=["args"], help={"args": "check tools (pyright) additional arguments"})
+def check_pyright(ctx: Context, args: str = ""):
+ """check static types with pyright"""
+ ctx.run("pyright --version")
+ ctx.run(f"pyright {args}", pty=use_pty)
+
+
+@task(optional=["args"], help={"args": "check tools (pyright) additional arguments"})
+def checkall(ctx: Context, args: str = ""):
+ """check static types"""
+ check_pyright(ctx, args)
+
+
+@task(optional=["args"], help={"args": "black additional arguments"})
+def fix_black(ctx: Context, args: str = "."):
+ """fix black formatting"""
+ args = args or "." # needed for hatch script
+ ctx.run(f"black {args}", pty=use_pty)
+
+
+@task(optional=["args"], help={"args": "ruff additional arguments"})
+def fix_ruff(ctx: Context, args: str = "."):
+ """fix all ruff rules"""
+ args = args or "." # needed for hatch script
+ ctx.run(f"ruff --fix {args}", pty=use_pty)
+
+
+@task(
+ optional=["args"],
+ help={
+ "args": "linting tools (black, ruff) additional arguments, typically a path",
+ },
+)
+def fixall(ctx: Context, args: str = "."):
+ """Fix everything automatically"""
+ args = args or "." # needed for hatch script
+ fix_black(ctx, args)
+ fix_ruff(ctx, args)
+ lintall(ctx, args)
diff --git a/scraper/tests/test_dummy.py b/scraper/tests/test_dummy.py
new file mode 100644
index 0000000..ac33693
--- /dev/null
+++ b/scraper/tests/test_dummy.py
@@ -0,0 +1,7 @@
+from fcc2zim.constants import VERSION
+
+
+# dummy test just to check that everything is in place to add more tests / report
+# coverage
+def test_version():
+ assert VERSION and len(VERSION) > 0
diff --git a/scraper/tests/test_scraper.py b/scraper/tests/test_scraper.py
new file mode 100644
index 0000000..8800541
--- /dev/null
+++ b/scraper/tests/test_scraper.py
@@ -0,0 +1,235 @@
+import datetime
+from pathlib import Path
+from tempfile import NamedTemporaryFile, TemporaryDirectory
+
+import pytest
+
+from fcc2zim.scraper import Scraper
+
+DEFAULT_START_DATE = datetime.date.fromisoformat("2023-08-23")
+WORKING_DIR = TemporaryDirectory(prefix="fcc2zim_tests_")
+WORKING_DIR_PATH = Path(WORKING_DIR.name)
+ZIMUI_DIST_PATH = WORKING_DIR_PATH.joinpath("zimui/dist")
+ZIMUI_DIST_PATH.mkdir(parents=True, exist_ok=True)
+BUILD_PATH = WORKING_DIR_PATH.joinpath("build")
+OUTPUT_PATH = WORKING_DIR_PATH.joinpath("output")
+
+LONG_TEXT = (
+ "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor "
+ "incididunt ut labore et dolore magna aliqua. At erat pellentesque adipiscing "
+ "commodo elit at imperdiet. Rutrum tellus pellentesque eu tincidunt tortor aliquam"
+ " nulla facilisi. Eget lorem dolor sed viverra ipsum nunc. Ipsum nunc aliquet "
+ "bibendum enim facilisis gravida neque convallis. Aliquam malesuada bibendum arcu "
+ "vitae elementum curabitur. Platea dictumst quisque sagittis purus sit amet "
+ "volutpat. Blandit libero volutpat sed cras ornare. In eu mi bibendum neque "
+ "egestas. Egestas dui id ornare arcu odio. Pulvinar neque laoreet suspendisse "
+ "interdum. Fames ac turpis egestas integer eget aliquet nibh praesent tristique. Et"
+ " egestas quis ipsum suspendisse ultrices gravida dictum fusce. Malesuada fames ac "
+ "turpis egestas. Tincidunt nunc pulvinar sapien et ligula ullamcorper malesuada "
+ "proin libero. In arcu cursus euismod quis viverra. Faucibus in ornare quam viverra"
+ ". Curabitur vitae nunc sed velit dignissim sodales ut eu sem. Velit scelerisque in"
+ " dictum non consectetur a erat nam. Proin fermentum leo vel orci porta non. Fames"
+ " ac turpis egestas sed tempus. Vitae justo eget magna fermentum iaculis eu non. "
+ "Imperdiet massa tincidunt nunc pulvinar sapien et ligula. Laoreet sit amet cursus "
+ "sit amet dictum sit amet. Quis hendrerit dolor magna eget. Orci ac auctor augue "
+ "mauris augue. Consequat interdum varius sit amet mattis. At ultrices mi tempus "
+ "imperdiet nulla malesuada pellentesque elit. Volutpat est velit egestas dui. "
+ "Potenti nullam ac tortor vitae. At tempor commodo ullamcorper a lacus vestibulum "
+ "sed arcu non. Duis ut diam quam nulla. Vestibulum mattis ullamcorper velit sed "
+ "ullamcorper. Sit amet commodo nulla facilisi nullam vehicula. Faucibus purus in "
+ "massa tempor nec feugiat. Sem fringilla ut morbi tincidunt augue interdum velit. "
+ "Etiam dignissim diam quis enim lobortis scelerisque fermentum dui. Nunc vel risus "
+ "commodo viverra maecenas accumsan. Aenean sed adipiscing diam donec adipiscing "
+ "tristique. Maecenas accumsan lacus vel facilisis volutpat est velit egestas. Nulla"
+ " aliquet porttitor lacus luctus accumsan tortor posuere ac. Habitant morbi "
+ "tristique senectus et netus et. Eget mi proin sed libero enim sed faucibus turpis "
+ "in. Vulputate enim nulla aliquet porttitor lacus. Dui ut ornare lectus sit amet "
+ "est. Quam lacus suspendisse faucibus interdum posuere. Sagittis orci a scelerisque"
+ " purus semper eget duis at tellus. Tellus molestie nunc non blandit massa. Feugiat"
+ " vivamus at augue eget arcu dictum varius duis at. Varius morbi enim nunc faucibus"
+ " a pellentesque sit. Id aliquet lectus proin nibh nisl condimentum id venenatis a."
+ " Tortor dignissim convallis aenean et tortor at risus viverra adipiscing. Aliquam "
+ "malesuada bibendum arcu vitae elementum curabitur vitae nunc sed. Habitasse platea"
+ " dictumst quisque sagittis purus sit amet volutpat. Vitae auctor eu augue ut "
+ "lectus. At varius vel pharetra vel turpis nunc eget. Dictum at tempor commodo "
+ "ullamcorper a lacus vestibulum sed arcu. Pellentesque massa placerat duis "
+ "ultricies. Enim nunc faucibus a pellentesque sit amet porttitor eget dolor. "
+ "Volutpat blandit aliquam etiam erat velit scelerisque in. Amet mattis vulputate "
+ "enim nulla aliquet porttitor. Egestas maecenas pharetra convallis posuere morbi "
+ "leo urna molestie. Duis ut diam quam nulla porttitor massa id. In fermentum "
+ "posuere urna nec tincidunt praesent. Turpis egestas sed tempus urna et pharetra "
+ "pharetra massa. Tellus molestie nunc non blandit massa. Diam phasellus vestibulum "
+ "lorem sed risus ultricies. Egestas erat imperdiet sed euismod nisi porta lorem. "
+ "Quam viverra orci sagittis eu volutpat odio facilisis mauris sit. Ornare aenean "
+ "euismod elementum nisi quis. Laoreet non curabitur gravida arcu ac tortor "
+ "dignissim convallis aenean. Sagittis aliquam malesuada bibendum arcu vitae "
+ "elementum. Sed blandit libero volutpat sed cras ornare. Sagittis eu volutpat odio "
+ "facilisis mauris. Facilisis volutpat est velit egestas dui id ornare arcu odio. "
+ "Eu feugiat pretium nibh."
+)
+
+
+class TestScraper:
+ def create_scraper(
+ self,
+ *,
+ do_fetch: bool = True,
+ do_prebuild: bool = True,
+ do_build: bool = True,
+ zimui_dist_dir: str = str(ZIMUI_DIST_PATH),
+ output_dir: str = str(OUTPUT_PATH),
+ build_dir: str = str(BUILD_PATH),
+ language: str = "eng",
+ name="fcc_en_javascript",
+ title="freeCodeCamp Javascript",
+ description="FCC Javascript Courses",
+ long_description: str | None = None,
+ content_creator: str = "freeCodeCamp",
+ publisher="openZIM",
+ zim_file: str | None = None,
+ force: bool = False,
+ course_csv="regular-expressions,basic-javascript",
+ zip_path: str | None = None,
+ start_date: datetime.date = DEFAULT_START_DATE,
+ ):
+ return Scraper(
+ do_fetch=do_fetch,
+ do_prebuild=do_prebuild,
+ do_build=do_build,
+ zimui_dist_dir=zimui_dist_dir,
+ output_dir=output_dir,
+ build_dir=build_dir,
+ language=language,
+ name=name,
+ title=title,
+ description=description,
+ long_description=long_description,
+ content_creator=content_creator,
+ publisher=publisher,
+ zim_file=zim_file,
+ force=force,
+ course_csv=course_csv,
+ zip_path=zip_path,
+ start_date=start_date,
+ )
+
+ def test_init_ok(self):
+ assert not OUTPUT_PATH.exists()
+ assert not BUILD_PATH.exists()
+ self.create_scraper()
+ assert OUTPUT_PATH.exists()
+ assert BUILD_PATH.exists()
+
+ @pytest.mark.parametrize(
+ "do_fetch, do_prebuild, do_build, expected_do_fetch, expected_do_prebuild,"
+ "expected_do_build",
+ [
+ pytest.param(False, False, False, True, True, True, id="FFF"),
+ pytest.param(True, False, False, True, False, False, id="TFF"),
+ pytest.param(False, True, False, False, True, False, id="FTF"),
+ pytest.param(True, True, False, True, True, False, id="TTF"),
+ pytest.param(False, False, True, False, False, True, id="FFT"),
+ pytest.param(True, False, True, True, False, True, id="TFT"),
+ pytest.param(False, True, True, False, True, True, id="FTT"),
+ pytest.param(True, True, True, True, True, True, id="TTT"),
+ ],
+ )
+ def test_do_phases_ok(
+ self,
+ *,
+ do_fetch: bool,
+ do_prebuild: bool,
+ do_build: bool,
+ expected_do_fetch: bool,
+ expected_do_prebuild: bool,
+ expected_do_build: bool,
+ ):
+ scraper = self.create_scraper(
+ do_fetch=do_fetch, do_prebuild=do_prebuild, do_build=do_build
+ )
+ assert scraper.do_fetch == expected_do_fetch
+ assert scraper.do_prebuild == expected_do_prebuild
+ assert scraper.do_build == expected_do_build
+
+ def test_zimui_dist_dir_ko(self):
+ with pytest.raises(ValueError):
+ self.create_scraper(zimui_dist_dir="whatever")
+
+ @pytest.mark.parametrize(
+ "language, expected_fcc_lang",
+ [
+ pytest.param("eng", "english", id="english"),
+ pytest.param("eng", "english", id="english"),
+ pytest.param("ara", "arabic", id="arabic"),
+ pytest.param("cmn", "chinese", id="chinese"),
+ pytest.param("lzh", "chinese-traditional", id="chinese-traditional"),
+ pytest.param("eng", "english", id="english"),
+ pytest.param("spa", "espanol", id="espanol"),
+ pytest.param("deu", "german", id="german"),
+ pytest.param("ita", "italian", id="italian"),
+ pytest.param("jpn", "japanese", id="japanese"),
+ pytest.param("por", "portuguese", id="portuguese"),
+ pytest.param("ukr", "ukranian", id="ukranian"),
+ ],
+ )
+ def test_fcc_lang_ok(self, language: str, expected_fcc_lang: str):
+ scraper = self.create_scraper(language=language)
+ assert scraper.language == language
+ assert scraper.fcc_lang == expected_fcc_lang
+
+ def test_language_ko(self):
+ with pytest.raises(ValueError):
+ self.create_scraper(language="whatever")
+
+ def test_description_ko(self):
+ with pytest.raises(ValueError):
+ self.create_scraper(description=LONG_TEXT[:81])
+
+ def test_long_description_ko(self):
+ with pytest.raises(ValueError):
+ self.create_scraper(long_description=LONG_TEXT[:4001])
+
+ def test_title_ko(self):
+ with pytest.raises(ValueError):
+ self.create_scraper(title=LONG_TEXT[:31])
+
+ def test_zip_path_ok(self):
+ with NamedTemporaryFile(dir=WORKING_DIR_PATH) as tmp:
+ zip_path = tmp.name
+ self.create_scraper(zip_path=zip_path)
+
+ def test_zip_path_ko(self):
+ with pytest.raises(ValueError):
+ self.create_scraper(zip_path="whatever")
+
+ @pytest.mark.parametrize(
+ "name, start_date",
+ [
+ pytest.param("something", "2023-08-23", id="case1"),
+ pytest.param("name2", "2023-08-24", id="case2"),
+ ],
+ )
+ def test_zim_file_default(self, name, start_date):
+ scraper = self.create_scraper(
+ name=name, start_date=datetime.date.fromisoformat(start_date)
+ )
+ assert scraper.zim_path == OUTPUT_PATH.joinpath(f"{name}_{start_date[:7]}.zim")
+
+ def test_zim_file_is_path_ko(self):
+ with pytest.raises(ValueError):
+ self.create_scraper(zim_file=str(OUTPUT_PATH.joinpath("whatever.zim")))
+
+ def test_zim_file_ok(self):
+ self.create_scraper(zim_file="whatever.zim")
+
+ def test_zim_file_exists_ko(self):
+ with NamedTemporaryFile(dir=OUTPUT_PATH, suffix=".zim") as tmp:
+ zim_file = Path(tmp.name).name
+ with pytest.raises(ValueError):
+ self.create_scraper(zim_file=zim_file)
+
+ def test_zim_file_exists_force(self):
+ with NamedTemporaryFile(dir=OUTPUT_PATH, suffix=".zim", delete=False) as tmp:
+ zim_file = Path(tmp.name).name
+ self.create_scraper(zim_file=zim_file, force=True)
+ assert not Path(tmp.name).exists()
diff --git a/client/.dockerignore b/zimui/.dockerignore
similarity index 100%
rename from client/.dockerignore
rename to zimui/.dockerignore
diff --git a/zimui/.eslintignore b/zimui/.eslintignore
new file mode 100644
index 0000000..e69de29
diff --git a/client/.eslintrc.cjs b/zimui/.eslintrc.cjs
similarity index 100%
rename from client/.eslintrc.cjs
rename to zimui/.eslintrc.cjs
diff --git a/client/.gitignore b/zimui/.gitignore
similarity index 100%
rename from client/.gitignore
rename to zimui/.gitignore
diff --git a/client/.prettierignore b/zimui/.prettierignore
similarity index 100%
rename from client/.prettierignore
rename to zimui/.prettierignore
diff --git a/client/.prettierrc.json b/zimui/.prettierrc.json
similarity index 100%
rename from client/.prettierrc.json
rename to zimui/.prettierrc.json
diff --git a/client/index.html b/zimui/index.html
similarity index 100%
rename from client/index.html
rename to zimui/index.html
diff --git a/client/package.json b/zimui/package.json
similarity index 100%
rename from client/package.json
rename to zimui/package.json
diff --git a/client/postcss.config.js b/zimui/postcss.config.js
similarity index 100%
rename from client/postcss.config.js
rename to zimui/postcss.config.js
diff --git a/client/public/free-code-camp-logo.svg b/zimui/public/free-code-camp-logo.svg
similarity index 100%
rename from client/public/free-code-camp-logo.svg
rename to zimui/public/free-code-camp-logo.svg
diff --git a/client/public/vite.svg b/zimui/public/vite.svg
similarity index 100%
rename from client/public/vite.svg
rename to zimui/public/vite.svg
diff --git a/client/src/App.vue b/zimui/src/App.vue
similarity index 100%
rename from client/src/App.vue
rename to zimui/src/App.vue
diff --git a/client/src/assets/vue.svg b/zimui/src/assets/vue.svg
similarity index 100%
rename from client/src/assets/vue.svg
rename to zimui/src/assets/vue.svg
diff --git a/client/src/components/challenge/ChallengeInstructions.vue b/zimui/src/components/challenge/ChallengeInstructions.vue
similarity index 100%
rename from client/src/components/challenge/ChallengeInstructions.vue
rename to zimui/src/components/challenge/ChallengeInstructions.vue
diff --git a/client/src/components/challenge/ChallengeRunner.vue b/zimui/src/components/challenge/ChallengeRunner.vue
similarity index 100%
rename from client/src/components/challenge/ChallengeRunner.vue
rename to zimui/src/components/challenge/ChallengeRunner.vue
diff --git a/client/src/components/challenge/CodeEditor.vue b/zimui/src/components/challenge/CodeEditor.vue
similarity index 100%
rename from client/src/components/challenge/CodeEditor.vue
rename to zimui/src/components/challenge/CodeEditor.vue
diff --git a/client/src/components/challenge/ConsoleLogger.vue b/zimui/src/components/challenge/ConsoleLogger.vue
similarity index 100%
rename from client/src/components/challenge/ConsoleLogger.vue
rename to zimui/src/components/challenge/ConsoleLogger.vue
diff --git a/client/src/main.ts b/zimui/src/main.ts
similarity index 100%
rename from client/src/main.ts
rename to zimui/src/main.ts
diff --git a/client/src/pages/ChallengePage.vue b/zimui/src/pages/ChallengePage.vue
similarity index 100%
rename from client/src/pages/ChallengePage.vue
rename to zimui/src/pages/ChallengePage.vue
diff --git a/client/src/pages/ChallengesPage.vue b/zimui/src/pages/ChallengesPage.vue
similarity index 100%
rename from client/src/pages/ChallengesPage.vue
rename to zimui/src/pages/ChallengesPage.vue
diff --git a/client/src/pages/HomePage.vue b/zimui/src/pages/HomePage.vue
similarity index 100%
rename from client/src/pages/HomePage.vue
rename to zimui/src/pages/HomePage.vue
diff --git a/client/src/routes.ts b/zimui/src/routes.ts
similarity index 100%
rename from client/src/routes.ts
rename to zimui/src/routes.ts
diff --git a/client/src/style.css b/zimui/src/style.css
similarity index 100%
rename from client/src/style.css
rename to zimui/src/style.css
diff --git a/client/src/utils/__tests__/fixtures/basicMarkdownChallenge.md b/zimui/src/utils/__tests__/fixtures/basicMarkdownChallenge.md
similarity index 100%
rename from client/src/utils/__tests__/fixtures/basicMarkdownChallenge.md
rename to zimui/src/utils/__tests__/fixtures/basicMarkdownChallenge.md
diff --git a/client/src/utils/__tests__/fixtures/sampleJSChallenge.md b/zimui/src/utils/__tests__/fixtures/sampleJSChallenge.md
similarity index 100%
rename from client/src/utils/__tests__/fixtures/sampleJSChallenge.md
rename to zimui/src/utils/__tests__/fixtures/sampleJSChallenge.md
diff --git a/client/src/utils/__tests__/parseChallenge.test.ts b/zimui/src/utils/__tests__/parseChallenge.test.ts
similarity index 100%
rename from client/src/utils/__tests__/parseChallenge.test.ts
rename to zimui/src/utils/__tests__/parseChallenge.test.ts
diff --git a/client/src/utils/__tests__/runChallenge.test.ts b/zimui/src/utils/__tests__/runChallenge.test.ts
similarity index 100%
rename from client/src/utils/__tests__/runChallenge.test.ts
rename to zimui/src/utils/__tests__/runChallenge.test.ts
diff --git a/client/src/utils/assert.ts b/zimui/src/utils/assert.ts
similarity index 100%
rename from client/src/utils/assert.ts
rename to zimui/src/utils/assert.ts
diff --git a/client/src/utils/helpers.ts b/zimui/src/utils/helpers.ts
similarity index 100%
rename from client/src/utils/helpers.ts
rename to zimui/src/utils/helpers.ts
diff --git a/client/src/utils/parseChallenge.ts b/zimui/src/utils/parseChallenge.ts
similarity index 100%
rename from client/src/utils/parseChallenge.ts
rename to zimui/src/utils/parseChallenge.ts
diff --git a/client/src/utils/runChallenge.ts b/zimui/src/utils/runChallenge.ts
similarity index 100%
rename from client/src/utils/runChallenge.ts
rename to zimui/src/utils/runChallenge.ts
diff --git a/client/src/utils/titleize.ts b/zimui/src/utils/titleize.ts
similarity index 100%
rename from client/src/utils/titleize.ts
rename to zimui/src/utils/titleize.ts
diff --git a/client/src/vite-env.d.ts b/zimui/src/vite-env.d.ts
similarity index 100%
rename from client/src/vite-env.d.ts
rename to zimui/src/vite-env.d.ts
diff --git a/client/tailwind.config.js b/zimui/tailwind.config.js
similarity index 100%
rename from client/tailwind.config.js
rename to zimui/tailwind.config.js
diff --git a/client/tsconfig.json b/zimui/tsconfig.json
similarity index 100%
rename from client/tsconfig.json
rename to zimui/tsconfig.json
diff --git a/client/tsconfig.node.json b/zimui/tsconfig.node.json
similarity index 100%
rename from client/tsconfig.node.json
rename to zimui/tsconfig.node.json
diff --git a/client/vite.config.ts b/zimui/vite.config.ts
similarity index 100%
rename from client/vite.config.ts
rename to zimui/vite.config.ts
diff --git a/client/yarn.lock b/zimui/yarn.lock
similarity index 100%
rename from client/yarn.lock
rename to zimui/yarn.lock