|
1 |
| -FROM docker.io/ocrd/core:v2.67.2 AS base |
2 |
| -# set proper locales |
3 |
| -ENV LANG C.UTF-8 |
4 |
| -ENV LC_ALL C.UTF-8 |
| 1 | +ARG DOCKER_BASE_IMAGE |
| 2 | +FROM $DOCKER_BASE_IMAGE |
5 | 3 | # install ocrd-tesserocr (until here commands for installing tesseract-ocr)
|
6 | 4 | ARG VCS_REF
|
7 | 5 | ARG BUILD_DATE
|
8 | 6 | LABEL \
|
9 | 7 | maintainer="https://ocr-d.de/kontakt" \
|
10 | 8 | org.label-schema.vcs-ref=$VCS_REF \
|
11 | 9 | org.label-schema.vcs-url="https://github.com/OCR-D/ocrd_tesserocr" \
|
12 |
| - org.label-schema.build-date=$BUILD_DATE |
| 10 | + org.label-schema.build-date=$BUILD_DATE \ |
| 11 | + org.opencontainers.image.vendor="DFG-Funded Initiative for Optical Character Recognition Development" \ |
| 12 | + org.opencontainers.image.title="ocrd_tesserocr" \ |
| 13 | + org.opencontainers.image.description="Tesseract OCR bindings" \ |
| 14 | + org.opencontainers.image.source="https://github.com/OCR-D/ocrd_tesserocr" \ |
| 15 | + org.opencontainers.image.documentation="https://github.com/OCR-D/ocrd_tesserocr/blob/${VCS_REF}/README.md" \ |
| 16 | + org.opencontainers.image.revision=$VCS_REF \ |
| 17 | + org.opencontainers.image.created=$BUILD_DATE \ |
| 18 | + org.opencontainers.image.base.name=ocrd/core |
13 | 19 |
|
14 |
| -ENV PYTHONIOENCODING utf8 |
15 | 20 |
|
16 | 21 | # set frontend non-interactive to silence interactive tzdata config
|
17 |
| -ARG DEBIAN_FRONTEND=noninteractive |
| 22 | +ENV DEBIAN_FRONTEND noninteractive |
| 23 | +# set proper locales |
| 24 | +ENV PYTHONIOENCODING utf8 |
| 25 | +ENV LANG C.UTF-8 |
| 26 | +ENV LC_ALL C.UTF-8 |
18 | 27 |
|
19 | 28 | # set proper date and timezone in container
|
20 | 29 | RUN echo "Europe/Berlin" > /etc/timezone
|
21 | 30 | RUN ln -sf /usr/share/zoneinfo/Europe/Berlin /etc/localtime
|
22 | 31 | RUN dpkg-reconfigure -f noninteractive tzdata
|
23 |
| - |
24 | 32 | # diagnostic output - check timezone settings
|
25 | 33 | # RUN cat /etc/timezone
|
26 | 34 |
|
27 | 35 | # avoid HOME/.local/share (hard to predict USER here)
|
28 | 36 | # so let XDG_DATA_HOME coincide with fixed system location
|
29 | 37 | # (can still be overridden by derived stages)
|
30 | 38 | ENV XDG_DATA_HOME /usr/local/share
|
| 39 | +# avoid the need for an extra volume for persistent resource user db |
| 40 | +# (i.e. XDG_CONFIG_HOME/ocrd/resources.yml) |
31 | 41 | ENV XDG_CONFIG_HOME /usr/local/share/ocrd-resources
|
32 | 42 | ENV TESSDATA_PREFIX $XDG_DATA_HOME/tessdata
|
33 | 43 |
|
34 | 44 | WORKDIR /build/ocrd_tesserocr
|
35 |
| -COPY setup.py . |
36 |
| -COPY ocrd_tesserocr/ocrd-tool.json . |
37 |
| -COPY README.md . |
38 |
| -COPY requirements.txt . |
39 |
| -COPY requirements_test.txt . |
40 |
| -COPY .git .git |
41 |
| -COPY .gitmodules . |
42 |
| -COPY ocrd_tesserocr ocrd_tesserocr |
43 |
| -COPY repo/tesserocr repo/tesserocr |
44 |
| -COPY repo/tesseract repo/tesseract |
45 |
| -COPY Makefile . |
| 45 | +COPY . . |
| 46 | +# prepackage ocrd-tool.json as ocrd-all-tool.json |
| 47 | +RUN ocrd ocrd-tool ocrd_tesserocr/ocrd-tool.json dump-tools > $(dirname $(ocrd bashlib filename))/ocrd-all-tool.json |
| 48 | +# install everything and reduce image size |
46 | 49 | RUN make deps-ubuntu \
|
47 |
| - && make -j4 install-tesseract \ |
48 |
| - && make -j4 install-tesseract-training \ |
49 |
| - && make deps install \ |
| 50 | + && make -j4 install GIT_SUBMODULE=: \ |
| 51 | + && make -j4 install-tesseract-training GIT_SUBMODULE=: \ |
50 | 52 | && rm -rf /build/ocrd_tesserocr \
|
51 | 53 | && apt-get -y remove --auto-remove g++ libtesseract-dev make
|
52 | 54 |
|
|
0 commit comments