#------------------------------------------------------------------- #----------- D E P R E C A T E D !! ------------------------------ #----------- this is not used anymore and kept temporarily --------- #----------- for compatibility with older GROBID versions ---------- #------------------------------------------------------------------- #------------------------------------------------------------------- #-------------------- resource directories --------------------- # properties of where to find directories necessary for GROBID # EACH KEY REFERENCING A PATH HAS TO ENDS WITH ".path" grobid.temp.path=./tmp #-------------------- external/native libs --------------------- #path to folder containing native libraries of 3rd parties grobid.nativelibrary.path=./lib grobid.3rdparty.pdf2xml.path=./pdf2xml grobid.3rdparty.pdf2xml.memory.limit.mb=6096 grobid.3rdparty.pdf2xml.timeout.sec=60 #-------------------- consolidation -------------------- # Define the bibliographical data consolidation service to be used, either "crossref" for CrossRef REST API or "glutton" for https://github.com/kermitt2/biblio-glutton grobid.consolidation.service=crossref #grobid.consolidation.service=glutton #org.grobid.glutton.host=cloud.science-miner.com/glutton #org.grobid.glutton.port=0 org.grobid.glutton.host=localhost org.grobid.glutton.port=8080 #org.grobid.crossref.mailto=toto@titi.tutu #org.grobid.crossref.token=yourmysteriouscrossrefmetadataplusauthorizationtokentobeputhere #-------------------- proxy -------------------- #proxy to be used for external call to the crossref REST API service or Glutton service if not deployed under proxy ("null" when no proxy) grobid.proxy_host=null grobid.proxy_port=null #-------------------- models ------------------ grobid.crf.engine=wapiti #grobid.crf.engine=delft #grobid.crf.engine=crfpp # wapiti must be used for the fulltext and segmentation models because they # are not covered by DeLFT for the moment (too long sequences for the deep # learning models). header and reference_segmenter deep learning models might # also be difficult to use without enough available memory (GPU or CPU). grobid.crf.engine.segmentation=wapiti grobid.crf.engine.fulltext=wapiti # replace here hyphen in model name by underscore, e.g. name-citation -> name_citation #grobid.crf.engine.reference_segmenter=wapiti #grobid.crf.engine.name_header=wapiti #grobid.crf.engine.header=wapiti #grobid.crf.engine.date=wapiti #grobid.crf.engine.figure=wapiti #grobid.crf.engine.table=wapiti #grobid.crf.engine.name_citation=wapiti #grobid.crf.engine.affiliation_address=delft #grobid.crf.engine.citation=delft grobid.delft.useELMo=false grobid.delft.install=../delft grobid.delft.python.virtualEnv= grobid.delft.architecture=BidLSTM_CRF_FEATURES #grobid.delft.architecture=scibert #-------------------- runtime ------------------ grobid.pdf.blocks.max=100000 grobid.pdf.tokens.max=1000000 #-------------------- training ------------------ #number of threads for training the wapiti models (0 to use all available processors) grobid.nb_threads=0 #-------------------- language processing bazaar ------------------ # actual implementation of language recognition to be used grobid.language_detector_factory=org.grobid.core.lang.impl.CybozuLanguageDetectorFactory # actual implementation of sentence segmentation to be used grobid.sentence_detector_factory=org.grobid.core.lang.impl.OpenNLPSentenceDetectorFactory #grobid.sentence_detector_factory=org.grobid.core.lang.impl.PragmaticSentenceDetectorFactory #-------------------- pooling ------------------- # Maximum parallel connections allowed org.grobid.max.connections=10 # Maximum time wait to get a connection when the pool is full (in seconds) org.grobid.pool.max.wait=1