Skip to content

Commit

Permalink
All doc conversion to HTML (#2171)
Browse files Browse the repository at this point in the history
  • Loading branch information
flodolo authored Feb 7, 2025
1 parent caf15c0 commit 6739b36
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 73 deletions.
76 changes: 76 additions & 0 deletions .github/scripts/convert_documents.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#! /usr/bin/env python3

import io
import json
import os
import markdown as md
from weasyprint import HTML
from pathlib import Path
from functions import findAllFiles


def convertMdToHTML(file_path):
output = io.BytesIO()
try:
# Parse the Markdown file
md.markdownFromFile(
input=str(file_path),
output=output,
extensions=[
"markdown.extensions.attr_list",
"markdown.extensions.toc",
"tables",
],
)
content = output.getvalue().decode("utf-8")
except OSError:
content = None
finally:
output.close()

return content


def main():
script_path = os.path.dirname(__file__)
root_path = os.path.abspath(os.path.join(script_path, os.pardir, os.pardir))

# Get list of documents to convert
with open(os.path.join(script_path, "convert_sources.json")) as f:
json_data = json.load(f)
pdf_files = json_data["pdf"]
html_files = json_data["html"]
files_to_convert = list(set(pdf_files + html_files))

files_list = findAllFiles(root_path)

for locale, filenames in files_list.items():
for f in filenames:
if f in files_to_convert:
locale_folder = os.path.join(root_path, locale)
source_file = os.path.join(locale_folder, f)

# Convert Markdown to HTML
html_content = convertMdToHTML(source_file)

# Save HTML file if requested, creating `html` folder if missing
if f in html_files:
Path(os.path.join(locale_folder, "html")).mkdir(exist_ok=True)
html_dest_file = os.path.join(
locale_folder, "html", f"{f.rstrip('.md')}.html"
)
with open(html_dest_file, "w", encoding="utf-8") as f:
f.write(html_content)

# Save PDF file if requested, creating `pdf` folder if missing
if f in pdf_files:
Path(os.path.join(locale_folder, "pdf")).mkdir(exist_ok=True)
pdf_dest_file = os.path.join(
locale_folder, "pdf", f"{f.rstrip('.md')}.pdf"
)
# Convert HTML to PDF
HTML(string=html_content).write_pdf(pdf_dest_file)


if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"files": [
"html": [],
"pdf": [
"firefox_relay_privacy_notice.md",
"firefox_relay_tos.md",
"hubs_privacy_notice.md",
Expand Down
60 changes: 0 additions & 60 deletions .github/scripts/convert_to_pdf.py

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: PDF generation
name: Convert documents to PDF/HTML

on:
push:
Expand All @@ -7,7 +7,7 @@ on:
paths:
- '**.md'
- '**.json'
- '.github/workflows/pdf_generation.yml'
- '.github/workflows/doc_generation.yml'
- '.github/requirements.txt'
# Ignore Markdown files in the root
- '!*.md'
Expand All @@ -16,7 +16,7 @@ on:

jobs:
fetch:
name: Generate PDF
name: Generate documents
runs-on: ubuntu-latest
steps:
- name: Install Linux packages
Expand All @@ -41,36 +41,36 @@ jobs:
# Set arbitrary EPOCH date to get reproducible PDF generation.
# https://github.com/Kozea/WeasyPrint/issues/1553
export SOURCE_DATE_EPOCH=1658210455
python .github/scripts/convert_to_pdf.py
python .github/scripts/convert_documents.py
- name: Set up git credentials
run : |
git config --global user.email '[email protected]'
git config --global user.name 'MozMEAO Bot'
- name: Commit changes to pdf_updates branch
- name: Commit changes to doc_updates branch
continue-on-error: true
run: |
# Commit changes. Failure is allowed if there is nothing to commit.
git checkout -B pdf_updates
git checkout -B doc_updates
git add .
git commit -m "Convert documents to PDF"
git push -f origin pdf_updates
git commit -m "Convert documents"
git push -f origin doc_updates
- name: Open pull request
continue-on-error: true
run: |
# continue-on-error is needed in case there is already an open PR
changes=$(git diff --name-only main pdf_updates -- | wc -l | awk '{print $1}')
changes=$(git diff --name-only main doc_updates -- | wc -l | awk '{print $1}')
if [[ "$changes" = "0" ]];
then
echo "No changes."
else
# Create pull request
gh pr create \
--title "[pdf] Update documents in PDF format" \
--body "Convert documents to PDF" \
--head pdf_updates \
--title "[pdf] Update converted documents" \
--body "Convert documents to PDF/HTML" \
--head doc_updates \
--base main \
--label l10n \
--reviewer @mozilla/legal-l10n
Expand Down

0 comments on commit 6739b36

Please sign in to comment.