Skip to content

Commit 947bdc1

Browse files
authored
Add more samples - especially one book (#7)
1 parent b990d73 commit 947bdc1

21 files changed

+120
-8
lines changed

.github/workflows/ci.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
pydantic
2+
PyPDF2

.github/workflows/ci.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,11 @@
22
# This file is autogenerated by pip-compile with python 3.10
33
# To update, run:
44
#
5-
# pip-compile ci.in
5+
# pip-compile .github/workflows/ci.in
66
#
77
pydantic==1.9.0
8-
# via -r ci.in
8+
# via -r .github/workflows/ci.in
9+
pypdf2==1.27.9
10+
# via -r .github/workflows/ci.in
911
typing-extensions==4.1.1
1012
# via pydantic

.github/workflows/file-size-check.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ jobs:
2525
uses: tj-actions/[email protected]
2626
- name: List all changed files
2727
run: |
28-
# MAXSIZE is 1 MB
29-
MAXSIZE=1000000
28+
# MAXSIZE is 10 MB
29+
MAXSIZE=10000000
3030
for FILENAME in ${{ steps.changed-files.outputs.all_changed_files }}; do
3131
FILESIZE=$(stat -c%s "$FILENAME")
3232
echo "Size of $FILENAME = $FILESIZE bytes."

.github/workflows/json_consistency.py

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,22 @@
33
import os
44
import sys
55
from pathlib import Path
6-
from typing import List
6+
from typing import List, Optional
77

88
from pydantic import BaseModel, NonNegativeInt
9+
from PyPDF2 import PdfFileReader
910

1011

1112
class PdfEntry(BaseModel):
1213
path: str
1314
producer: str
1415
pages: NonNegativeInt
1516
creation_date: datetime.datetime
16-
images: NonNegativeInt
17+
images: Optional[NonNegativeInt]
1718

1819

1920
class MainPdfFile(BaseModel):
20-
data: list[PdfEntry]
21+
data: List[PdfEntry]
2122

2223

2324
def main():
@@ -34,6 +35,7 @@ def main():
3435
seen_failure = True
3536
else:
3637
print(f"✅ Found {entry.path}")
38+
check_meta(entry)
3739

3840
# Are all files registered?
3941
pdf_paths = Path(".").glob("**/*.pdf")
@@ -46,5 +48,47 @@ def main():
4648
sys.exit(1)
4749

4850

51+
def pdf_to_datetime(date_str):
52+
if not date_str.startswith("D:"):
53+
print(f"❌ ERROR: Invalid date: {date_str}")
54+
date_str = date_str[2:]
55+
if len(date_str) < 14:
56+
print(f"❌ ERROR: Invalid date: {date_str}")
57+
return datetime.datetime(
58+
int(date_str[0:4]), # year
59+
int(date_str[4:6]), # month
60+
int(date_str[6:8]), # day
61+
int(date_str[8:10]), # hour
62+
int(date_str[10:12]), # minute
63+
int(date_str[12:14]), # second
64+
)
65+
66+
67+
def check_meta(entry: PdfEntry):
68+
reader = PdfFileReader(entry.path)
69+
if reader.isEncrypted:
70+
return
71+
info = reader.getDocumentInfo()
72+
if info.get("/Producer") != entry.producer:
73+
print(
74+
f"❌ ERROR: Producer mismatch: {entry.producer} vs {info.get('/Producer')}"
75+
)
76+
77+
pdf_date = pdf_to_datetime(info.get("/CreationDate")).isoformat()
78+
entry_date = entry.creation_date.isoformat()[:19]
79+
if pdf_date != entry_date:
80+
print(
81+
f"❌ ERROR: Creation date mismatch: {entry_date} vs {pdf_date}"
82+
)
83+
# if entry.images is not None:
84+
# if info.get("/XObject") is None:
85+
# if entry.images > 0:
86+
# print(f"❌ ERROR: No XObject, but {entry.images} expected")
87+
# elif len(info.get("/XObject")) != entry.images:
88+
# print(
89+
# f"❌ ERROR: XObject count mismatch: {entry.images} vs {len(info.get('/XObject'))}"
90+
# )
91+
92+
4993
if __name__ == "__main__":
5094
main()

007-imagemagick-images/README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
Generated via [imagemagick](https://legacy.imagemagick.org/Usage/formats/#ps):
2+
3+
```bash
4+
convert -quality 100 * imagemagick-images.pdf
5+
convert -compress none smile.png imagemagick-ASCII85Decode.pdf
6+
convert -compress lzw smile.png imagemagick-lzw.pdf
7+
convert -alpha off -monochrome -compress fax smile.png imagemagick-CCITTFaxDecode.pdf
8+
```
Binary file not shown.
Binary file not shown.
15.6 KB
Binary file not shown.
2.62 KB
Binary file not shown.
193 KB
Binary file not shown.

0 commit comments

Comments
 (0)