Spaces:
Sleeping
Sleeping
| ## Grobid evaluation image | |
| # ------ | |
| # https://grobid.readthedocs.io/en/latest/End-to-end-evaluation/ | |
| # NOTE: To match the exact evaluation published in the Grobid documentation is necessary to have a | |
| # running Biblio-glutton instance | |
| # | |
| # A project using this image can be found here: https://huggingface.co/spaces/lfoppiano/grobid-evaluation | |
| # Please notice that the evaluation is run through a python script that runs all the needed commands | |
| # TODO: upload the evaluation in Markdown somewhere | |
| # ------------------- | |
| # build builder image | |
| # ------------------- | |
| FROM eclipse-temurin:17.0.15_6-jdk AS builder | |
| USER root | |
| RUN apt-get update && \ | |
| apt-get -y upgrade && \ | |
| apt-get -y --no-install-recommends install unzip git | |
| WORKDIR /opt/grobid | |
| # gradle | |
| COPY gradle/ ./gradle/ | |
| COPY gradlew ./ | |
| COPY gradle.properties ./ | |
| COPY build.gradle ./ | |
| COPY settings.gradle ./ | |
| COPY .git/ ./.git | |
| # source | |
| COPY grobid-home/ ./grobid-home/ | |
| COPY grobid-core/ ./grobid-core/ | |
| COPY grobid-service/ ./grobid-service/ | |
| COPY grobid-trainer/ ./grobid-trainer/ | |
| # cleaning unused native libraries before packaging | |
| RUN rm -rf grobid-home/pdf2xml | |
| RUN rm -rf grobid-home/pdfalto/lin-32 | |
| RUN rm -rf grobid-home/pdfalto/mac-64 | |
| RUN rm -rf grobid-home/pdfalto/mac_arm-64 | |
| RUN rm -rf grobid-home/pdfalto/win-* | |
| RUN rm -rf grobid-home/lib/lin-32 | |
| RUN rm -rf grobid-home/lib/win-* | |
| RUN rm -rf grobid-home/lib/mac-64 | |
| # Setting DL-powered configuration | |
| RUN rm grobid-home/config/grobid.yaml && \ | |
| mv grobid-home/config/grobid-evaluation.yaml grobid-home/config/grobid.yaml | |
| # Download evaluation data (For space reasons, we are not downloading the evaluation data) \ | |
| # See https://huggingface.co/spaces/lfoppiano/grobid-evaluation/blob/main/Dockerfile | |
| WORKDIR /opt/grobid/evaluation | |
| #RUN git lfs install && git clone --depth 1 https://huggingface.co/datasets/sciencialab/grobid-evaluation evaluation | |
| #RUN chmod -R uog+rw /opt/grobid/evaluation | |
| # ------------------- | |
| # build runtime image | |
| # ------------------- | |
| # use NVIDIA Container Toolkit to automatically recognize possible GPU drivers on the host machine | |
| FROM tensorflow/tensorflow:2.7.0-gpu | |
| # setting locale is likely useless but to be sure | |
| ENV LANG C.UTF-8 | |
| # update NVIDIA Cuda key (following a key rotation in April 2022) | |
| RUN apt-get install -y wget && \ | |
| apt-key del 7fa2af80 && \ | |
| rm /etc/apt/sources.list.d/cuda.list && \ | |
| rm /etc/apt/sources.list.d/nvidia-ml.list && \ | |
| wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb && \ | |
| dpkg -i cuda-keyring_1.1-1_all.deb | |
| # install python and other dependencies | |
| RUN apt-get update && \ | |
| apt-mark hold libcudnn8 && \ | |
| apt-get -y upgrade && \ | |
| apt-get -y --no-install-recommends install \ | |
| bash apt-utils build-essential gcc libxml2 libfontconfig unzip curl \ | |
| musl gfortran \ | |
| python3 python3-pip python3-setuptools python3-dev \ | |
| && apt-get clean \ | |
| && rm -rf /var/lib/apt/lists/* | |
| WORKDIR /opt/grobid | |
| COPY --from=builder /opt/grobid . | |
| RUN python3 -m pip install pip --upgrade --no-cache-dir | |
| # install DeLFT via pypi | |
| RUN pip3 install --no-cache-dir requests delft==0.3.3 | |
| # link the data directory to /data | |
| # the current working directory will most likely be /opt/grobid | |
| RUN mkdir -p /data \ | |
| && ln -s /data /opt/grobid/data \ | |
| && ln -s /data ./data | |
| # disable python warnings (and fix logging) | |
| ENV PYTHONWARNINGS="ignore" | |
| ENV JAVA_HOME=/opt/java/openjdk | |
| ENV PATH=$JAVA_HOME/bin:$PATH | |
| ENV JAVA_OPTS=-Xmx4g | |
| # Get java jdk from builder image | |
| # See tag of builder image in [Dockerfile tag](https://github.com/docker-library/docs/blob/master/eclipse-temurin/README.md#simple-tags) to find JAVA_HOME to copy from builder image. | |
| COPY --from=builder /opt/java/openjdk ${JAVA_HOME} | |
| # install jep | |
| RUN pip3 install jep==4.0.3 | |
| ENV LD_LIBRARY_PATH=/usr/local/lib/python3.8/dist-packages/jep:grobid-home/lib/lin-64:grobid-home/lib/lin-64/jep:${LD_LIBRARY_PATH} | |
| # remove libjep.so because we are providing our own version in the virtual env above | |
| RUN rm /opt/grobid/grobid-home/lib/lin-64/jep/libjep.so | |
| # Add Tini | |
| ENV TINI_VERSION v0.19.0 | |
| ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini | |
| RUN chmod +x /tini | |
| ENTRYPOINT ["/tini", "-s", "--"] | |
| WORKDIR /opt/grobid | |
| # preload embeddings, for GROBID all the RNN models use glove-840B (default for the script), ELMo is currently not loaded | |
| # to be done: mechanism to download GROBID fine-tuned models based on SciBERT if selected (but not good enough for the moment) | |
| COPY --from=builder /opt/grobid/grobid-home/scripts/preload_embeddings.py . | |
| COPY --from=builder /opt/grobid/grobid-home/config/resources-registry.json . | |
| RUN python3 preload_embeddings.py --registry ./resources-registry.json && \ | |
| ln -s /opt/grobid /opt/delft | |
| RUN mkdir delft && \ | |
| cp ./resources-registry.json delft/ | |
| VOLUME ["/opt/grobid/grobid-home/tmp"] | |
| LABEL \ | |
| authors="The contributors" \ | |
| org.label-schema.name="Grobid" \ | |
| org.label-schema.description="Image running the Grobid End 2 end evaluation" \ | |
| org.label-schema.url="https://github.com/kermitt2/Grobid" \ | |
| org.label-schema.version=${GROBID_VERSION} |