Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- .claude/settings.local.json +14 -0
- .claude/skills/optimize-element-descriptions/SKILL.md +124 -0
- .github/workflows/ci.yml +28 -0
- .github/workflows/release.yml +81 -0
- .local/eval-baseline.log +41 -0
- .local/eval-batch10.log +41 -0
- .local/evaluate-llm.log +112 -0
- .local/gemini-batch10-full.log +43 -0
- .local/gemini-batch10-full.progress +6 -6
- .local/kisski-batch1.log +43 -0
- .local/kisski-batch1.progress +4 -12
- .local/kisski-batch10-t600.log +43 -0
- .local/kisski-batch10-t600.progress +6 -6
- .local/kisski-batch10.log +68 -0
- .local/kisski-batch10.progress +2 -5
- .local/kisski-batch162.log +11 -0
- .local/kisski-batch162.progress +2 -0
- .local/kisski-batch50.log +17 -0
- .local/kisski-batch50.progress +2 -0
- .pytest_cache/.gitignore +2 -0
- .pytest_cache/CACHEDIR.TAG +4 -0
- .pytest_cache/README.md +8 -0
- .pytest_cache/v/cache/lastfailed +1 -0
- .pytest_cache/v/cache/nodeids +162 -0
- .python-version +1 -0
- .releaserc.json +34 -0
- CHANGELOG.md +93 -0
- CLAUDE.md +149 -0
- README.md +1 -1
- package-lock.json +0 -0
- package.json +32 -0
- pyproject.toml +0 -2
- requirements.txt +7 -0
- schema/tei-bib.rng +0 -0
- tei_annotator/providers/README.md +1 -1
- webservice/nginx.conf +86 -0
.claude/settings.local.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"permissions": {
|
| 3 |
+
"allow": [
|
| 4 |
+
"Bash(xargs grep -l \"webservice\\\\|fastapi\\\\|flask\")",
|
| 5 |
+
"WebSearch",
|
| 6 |
+
"WebFetch(domain:router.huggingface.co)",
|
| 7 |
+
"Bash(/Users/cboulanger/.local/bin/hf auth:*)",
|
| 8 |
+
"Bash(uv sync:*)"
|
| 9 |
+
]
|
| 10 |
+
},
|
| 11 |
+
"enabledPlugins": {
|
| 12 |
+
"hf-cli@huggingface-skills": true
|
| 13 |
+
}
|
| 14 |
+
}
|
.claude/skills/optimize-element-descriptions/SKILL.md
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
name: optimize-element-descriptions
|
| 3 |
+
description: Iteratively improve TEIElement descriptions and schema rules to maximise F1 against the gold standard. Use when annotation quality is low or when evaluation shows missed or spurious spans.
|
| 4 |
+
disable-model-invocation: true
|
| 5 |
+
argument-hint: "--max-items N --provider gemini|kisski|all"
|
| 6 |
+
---
|
| 7 |
+
|
| 8 |
+
# optimize-element-descriptions
|
| 9 |
+
|
| 10 |
+
Iteratively improve the `TEIElement` descriptions and `TEISchema.rules` in the relevant schema file under `tei_annotator/schemas/` to maximise F1 score against the gold standard.
|
| 11 |
+
|
| 12 |
+
Schema files:
|
| 13 |
+
- `tei_annotator/schemas/bibl.py` β `build_bibl_schema()`
|
| 14 |
+
- `tei_annotator/schemas/bibl_reference_segmenter.py` β `build_bibl_reference_segmenter_schema()`
|
| 15 |
+
|
| 16 |
+
Before writing any descriptions, read the guidelines in [docs/tei-element-descriptions.md](../../../docs/tei-element-descriptions.md).
|
| 17 |
+
|
| 18 |
+
Extra arguments passed to this skill (e.g. `--max-items 10 --provider gemini`) are forwarded to `evaluate_llm.py` where applicable.
|
| 19 |
+
|
| 20 |
+
---
|
| 21 |
+
|
| 22 |
+
## Workflow
|
| 23 |
+
|
| 24 |
+
### Step 1 β Baseline evaluation
|
| 25 |
+
|
| 26 |
+
Run a full evaluation with `--verbose` and `--match-mode overlap` to capture missed and spurious spans for every failing record:
|
| 27 |
+
|
| 28 |
+
```bash
|
| 29 |
+
uv run scripts/evaluate_llm.py --verbose --match-mode overlap $ARGUMENTS
|
| 30 |
+
```
|
| 31 |
+
|
| 32 |
+
Record the overall Micro F1, per-element F1, and the text of the lowest-scoring records.
|
| 33 |
+
|
| 34 |
+
---
|
| 35 |
+
|
| 36 |
+
### Step 2 β Diagnose failure patterns
|
| 37 |
+
|
| 38 |
+
For each record where F1 < 1.0, analyse the `missed=` and `spurious=` lists alongside the Gold and Annotation lines shown by `--verbose`.
|
| 39 |
+
|
| 40 |
+
Group failures into patterns such as:
|
| 41 |
+
|
| 42 |
+
| Pattern | Typical cause |
|
| 43 |
+
|---|---|
|
| 44 |
+
| Span emitted as wrong element (spurious + missed same text) | Conflicting or missing negative constraint in description |
|
| 45 |
+
| Required parent span missing (e.g. `author` around `orgName`) | Parentβchild relationship not described from both sides |
|
| 46 |
+
| Multiple instances merged into one span | No explicit "one span per β¦" instruction |
|
| 47 |
+
| Span boundary includes surrounding punctuation | Span boundary not specified in description |
|
| 48 |
+
| Positional trigger missed (e.g. editor after "in") | Contextual keyword triggers absent from description |
|
| 49 |
+
|
| 50 |
+
Focus on patterns that affect **multiple records or both models**: single-record anomalies may be gold-standard issues, not description issues.
|
| 51 |
+
|
| 52 |
+
---
|
| 53 |
+
|
| 54 |
+
### Step 3 β Improve descriptions
|
| 55 |
+
|
| 56 |
+
Read the relevant schema file under `tei_annotator/schemas/` to see the current descriptions, then edit the builder function following the guidelines in [docs/tei-element-descriptions.md](../../../docs/tei-element-descriptions.md).
|
| 57 |
+
|
| 58 |
+
Key principles (summary):
|
| 59 |
+
- Phrase everything as "emit a span", not "wrap in a tag"
|
| 60 |
+
- State multiplicity explicitly: "a separate span for each distinct β¦"
|
| 61 |
+
- Describe parentβchild direction from both sides with a concrete example
|
| 62 |
+
- Add negative constraints: "never tag X as Y"
|
| 63 |
+
- Include textual triggers (keywords, position) and inline surface-form examples
|
| 64 |
+
- Prefix critical constraints with `CRITICAL:`
|
| 65 |
+
- If a failure pattern affects **multiple element types**, add the constraint to `TEISchema.rules` instead of duplicating it in each element description β the prompt renders `rules` as a numbered "General Rules" section before all element descriptions.
|
| 66 |
+
|
| 67 |
+
Only edit descriptions for elements where you identified a clear failure pattern.
|
| 68 |
+
|
| 69 |
+
---
|
| 70 |
+
|
| 71 |
+
### Step 4 β Targeted re-evaluation with `--grep`
|
| 72 |
+
|
| 73 |
+
Build a grep pattern from the text of the failing records identified in Step 1, then re-run only those records:
|
| 74 |
+
|
| 75 |
+
```bash
|
| 76 |
+
uv run scripts/evaluate_llm.py --verbose --match-mode overlap \
|
| 77 |
+
--grep "pattern1|pattern2|..." $ARGUMENTS
|
| 78 |
+
```
|
| 79 |
+
|
| 80 |
+
Compare the new F1 values against the Step 1 baseline for each affected record.
|
| 81 |
+
|
| 82 |
+
---
|
| 83 |
+
|
| 84 |
+
### Step 5 β Decide: iterate or stop
|
| 85 |
+
|
| 86 |
+
**Iterate (go to Step 2)** if:
|
| 87 |
+
- At least one record improved and no regressions were introduced, AND
|
| 88 |
+
- Remaining failures still show patterns addressable by description changes
|
| 89 |
+
|
| 90 |
+
**Stop** if any of the following apply:
|
| 91 |
+
- No improvement across two consecutive rounds
|
| 92 |
+
- Remaining failures appear to be gold-standard annotation issues (flag these for human review; see Step 5a)
|
| 93 |
+
- Failures are caused by model-level reasoning limits that description changes cannot fix (e.g. a model consistently ignoring a rule that is already clearly stated)
|
| 94 |
+
|
| 95 |
+
---
|
| 96 |
+
|
| 97 |
+
### Step 5a β Handle editorial ambiguities with `cert="low"`
|
| 98 |
+
|
| 99 |
+
If a failure pattern **persists across model families** after two or more rule iterations and the boundary in question reflects a genuine editorial choice (either split or merged would be defensible), do **not** continue iterating on the prompt. Instead, update the gold file:
|
| 100 |
+
|
| 101 |
+
1. Split the merged gold span into two adjacent spans with **no tail text** between them.
|
| 102 |
+
2. Set `cert="low"` on the **second** span.
|
| 103 |
+
|
| 104 |
+
```xml
|
| 105 |
+
<!-- before -->
|
| 106 |
+
<bibl><label>5</label> Commentary mentioning Althusser; see Bunn (2015).<lb/> </bibl>
|
| 107 |
+
|
| 108 |
+
<!-- after -->
|
| 109 |
+
<bibl><label>5</label> Commentary mentioning Althusser;</bibl><bibl cert="low">see Bunn (2015).<lb/> </bibl>
|
| 110 |
+
```
|
| 111 |
+
|
| 112 |
+
The evaluator's union-match pass then accepts either model behaviour (split or merged) as correct. See [tei_annotator/evaluation/README.md](../../../tei_annotator/evaluation/README.md#uncertain-boundary-gold-spans-certlow) for the full specification.
|
| 113 |
+
|
| 114 |
+
---
|
| 115 |
+
|
| 116 |
+
### Step 6 β Full re-evaluation (final)
|
| 117 |
+
|
| 118 |
+
Once iterations are complete, run a full evaluation without `--grep` to confirm that overall F1 has not regressed on records that were previously correct:
|
| 119 |
+
|
| 120 |
+
```bash
|
| 121 |
+
uv run scripts/evaluate_llm.py --verbose --match-mode overlap $ARGUMENTS
|
| 122 |
+
```
|
| 123 |
+
|
| 124 |
+
Report the final Micro F1 and per-element breakdown, noting which elements improved and which remain problematic.
|
.github/workflows/ci.yml
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: CI
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches: [main, develop]
|
| 6 |
+
pull_request:
|
| 7 |
+
branches: [main, develop]
|
| 8 |
+
|
| 9 |
+
jobs:
|
| 10 |
+
test:
|
| 11 |
+
name: Run Tests
|
| 12 |
+
runs-on: ubuntu-latest
|
| 13 |
+
|
| 14 |
+
steps:
|
| 15 |
+
- name: Checkout code
|
| 16 |
+
uses: actions/checkout@v4
|
| 17 |
+
|
| 18 |
+
- name: Set up uv
|
| 19 |
+
uses: astral-sh/setup-uv@v5
|
| 20 |
+
with:
|
| 21 |
+
python-version: "3.12"
|
| 22 |
+
enable-cache: true
|
| 23 |
+
|
| 24 |
+
- name: Install dependencies
|
| 25 |
+
run: uv sync
|
| 26 |
+
|
| 27 |
+
- name: Run tests
|
| 28 |
+
run: uv run pytest
|
.github/workflows/release.yml
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Release
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
workflow_run:
|
| 5 |
+
workflows: ["CI"]
|
| 6 |
+
types:
|
| 7 |
+
- completed
|
| 8 |
+
branches:
|
| 9 |
+
- main
|
| 10 |
+
|
| 11 |
+
permissions:
|
| 12 |
+
contents: write
|
| 13 |
+
issues: write
|
| 14 |
+
pull-requests: write
|
| 15 |
+
|
| 16 |
+
jobs:
|
| 17 |
+
release:
|
| 18 |
+
name: Semantic Release
|
| 19 |
+
runs-on: ubuntu-latest
|
| 20 |
+
if: ${{ github.event.workflow_run.conclusion == 'success' }}
|
| 21 |
+
|
| 22 |
+
steps:
|
| 23 |
+
- name: Checkout code
|
| 24 |
+
uses: actions/checkout@v4
|
| 25 |
+
with:
|
| 26 |
+
fetch-depth: 0
|
| 27 |
+
persist-credentials: false
|
| 28 |
+
|
| 29 |
+
- name: Set up Node.js
|
| 30 |
+
uses: actions/setup-node@v4
|
| 31 |
+
with:
|
| 32 |
+
node-version: "22"
|
| 33 |
+
|
| 34 |
+
- name: Set up Python
|
| 35 |
+
uses: actions/setup-python@v5
|
| 36 |
+
with:
|
| 37 |
+
python-version: "3.12"
|
| 38 |
+
|
| 39 |
+
- name: Install uv
|
| 40 |
+
uses: astral-sh/setup-uv@v5
|
| 41 |
+
|
| 42 |
+
- name: Install Node dependencies
|
| 43 |
+
run: npm ci
|
| 44 |
+
|
| 45 |
+
- name: Run semantic-release
|
| 46 |
+
env:
|
| 47 |
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
| 48 |
+
GIT_AUTHOR_NAME: ${{ github.actor }}
|
| 49 |
+
GIT_AUTHOR_EMAIL: ${{ github.actor }}@users.noreply.github.com
|
| 50 |
+
GIT_COMMITTER_NAME: ${{ github.actor }}
|
| 51 |
+
GIT_COMMITTER_EMAIL: ${{ github.actor }}@users.noreply.github.com
|
| 52 |
+
run: npx semantic-release
|
| 53 |
+
|
| 54 |
+
update-tags:
|
| 55 |
+
name: Update Dynamic Tags
|
| 56 |
+
runs-on: ubuntu-latest
|
| 57 |
+
needs: release
|
| 58 |
+
if: success()
|
| 59 |
+
|
| 60 |
+
steps:
|
| 61 |
+
- name: Checkout code
|
| 62 |
+
uses: actions/checkout@v4
|
| 63 |
+
with:
|
| 64 |
+
fetch-depth: 0
|
| 65 |
+
|
| 66 |
+
- name: Update latest and stable tags
|
| 67 |
+
run: |
|
| 68 |
+
git config user.name "GitHub Actions"
|
| 69 |
+
git config user.email "actions@github.com"
|
| 70 |
+
|
| 71 |
+
LATEST_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "")
|
| 72 |
+
|
| 73 |
+
if [[ -n "$LATEST_TAG" && ! "$LATEST_TAG" =~ - ]]; then
|
| 74 |
+
echo "Updating latest and stable tags to $LATEST_TAG"
|
| 75 |
+
git tag -f latest
|
| 76 |
+
git tag -f stable
|
| 77 |
+
git push origin latest --force
|
| 78 |
+
git push origin stable --force
|
| 79 |
+
else
|
| 80 |
+
echo "No version tags found or latest tag is a pre-release. Skipping tag update."
|
| 81 |
+
fi
|
.local/eval-baseline.log
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 3 |
+
Provider : Gemini 2.0 Flash
|
| 4 |
+
Gold file : tests/fixtures/blbl-examples.tei.xml
|
| 5 |
+
Records : 30 match-mode: text
|
| 6 |
+
Batch size: 1
|
| 7 |
+
GLiNER : disabled
|
| 8 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 9 |
+
|
| 10 |
+
Completed: 30/30 records
|
| 11 |
+
|
| 12 |
+
=== Overall β Gemini 2.0 Flash ===
|
| 13 |
+
Micro P=0.807 R=0.843 F1=0.825 (TP=247 FP=59 FN=46)
|
| 14 |
+
Macro P=0.757 R=0.767 F1=0.754
|
| 15 |
+
|
| 16 |
+
Per-element breakdown:
|
| 17 |
+
author P=0.488 R=0.700 F1=0.575 (TP=21 FP=22 FN=9)
|
| 18 |
+
biblScope P=0.944 R=0.895 F1=0.919 (TP=34 FP=2 FN=4)
|
| 19 |
+
date P=0.909 R=0.882 F1=0.896 (TP=30 FP=3 FN=4)
|
| 20 |
+
editor P=0.800 R=0.800 F1=0.800 (TP=4 FP=1 FN=1)
|
| 21 |
+
forename P=0.902 R=0.920 F1=0.911 (TP=46 FP=5 FN=4)
|
| 22 |
+
idno P=1.000 R=1.000 F1=1.000 (TP=1 FP=0 FN=0)
|
| 23 |
+
label P=1.000 R=0.667 F1=0.800 (TP=2 FP=0 FN=1)
|
| 24 |
+
note P=0.200 R=0.333 F1=0.250 (TP=1 FP=4 FN=2)
|
| 25 |
+
orgName P=0.200 R=0.333 F1=0.250 (TP=1 FP=4 FN=2)
|
| 26 |
+
pubPlace P=0.765 R=0.929 F1=0.839 (TP=13 FP=4 FN=1)
|
| 27 |
+
publisher P=0.909 R=0.833 F1=0.870 (TP=10 FP=1 FN=2)
|
| 28 |
+
surname P=1.000 R=1.000 F1=1.000 (TP=51 FP=0 FN=0)
|
| 29 |
+
title P=0.717 R=0.673 F1=0.695 (TP=33 FP=13 FN=16)
|
| 30 |
+
|
| 31 |
+
Lowest-F1 records (top 5):
|
| 32 |
+
# 2 F1=0.615 missed=['orgName', 'orgName', 'title'] spurious=['orgName', 'note']
|
| 33 |
+
"Commission Inter-IREM Collège & Commission Inter-IREM S..."
|
| 34 |
+
# 29 F1=0.615 missed=['forename', 'author'] spurious=['forename', 'forename', 'author']
|
| 35 |
+
"Cohen, Gary B. Education and Middle Class Society in Im..."
|
| 36 |
+
# 3 F1=0.625 missed=['date', 'title', 'biblScope'] spurious=['date', 'title', 'biblScope']
|
| 37 |
+
"BARIL, Jean (2013). Droit dβaccΓ¨s Γ lβinformation envir..."
|
| 38 |
+
# 5 F1=0.625 missed=['forename', 'author', 'title'] spurious=['forename', 'author', 'title']
|
| 39 |
+
"Doyle JJ. 1998. Phylogenetic perspectives on nodulation..."
|
| 40 |
+
# 19 F1=0.640 missed=['author', 'title', 'title', 'forename', 'editor'] spurious=['author', 'title', 'forename', 'editor']
|
| 41 |
+
"Taitt, David. 1916. "Journal of David Taitt's Travels f..."
|
.local/eval-batch10.log
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 3 |
+
Provider : Gemini 2.0 Flash
|
| 4 |
+
Gold file : tests/fixtures/blbl-examples.tei.xml
|
| 5 |
+
Records : 30 match-mode: text
|
| 6 |
+
Batch size: 10
|
| 7 |
+
GLiNER : disabled
|
| 8 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 9 |
+
|
| 10 |
+
Completed: 30/30 records
|
| 11 |
+
|
| 12 |
+
=== Overall β Gemini 2.0 Flash ===
|
| 13 |
+
Micro P=0.825 R=0.870 F1=0.847 (TP=255 FP=54 FN=38)
|
| 14 |
+
Macro P=0.765 R=0.808 F1=0.779
|
| 15 |
+
|
| 16 |
+
Per-element breakdown:
|
| 17 |
+
author P=0.676 R=0.767 F1=0.719 (TP=23 FP=11 FN=7)
|
| 18 |
+
biblScope P=0.971 R=0.895 F1=0.932 (TP=34 FP=1 FN=4)
|
| 19 |
+
date P=1.000 R=1.000 F1=1.000 (TP=34 FP=0 FN=0)
|
| 20 |
+
editor P=0.800 R=0.800 F1=0.800 (TP=4 FP=1 FN=1)
|
| 21 |
+
forename P=0.833 R=0.900 F1=0.865 (TP=45 FP=9 FN=5)
|
| 22 |
+
idno P=1.000 R=1.000 F1=1.000 (TP=1 FP=0 FN=0)
|
| 23 |
+
label P=1.000 R=1.000 F1=1.000 (TP=3 FP=0 FN=0)
|
| 24 |
+
note P=0.200 R=0.333 F1=0.250 (TP=1 FP=4 FN=2)
|
| 25 |
+
orgName P=0.100 R=0.333 F1=0.154 (TP=1 FP=9 FN=2)
|
| 26 |
+
pubPlace P=0.722 R=0.929 F1=0.813 (TP=13 FP=5 FN=1)
|
| 27 |
+
publisher P=0.909 R=0.833 F1=0.870 (TP=10 FP=1 FN=2)
|
| 28 |
+
surname P=0.980 R=0.980 F1=0.980 (TP=50 FP=1 FN=1)
|
| 29 |
+
title P=0.750 R=0.735 F1=0.742 (TP=36 FP=12 FN=13)
|
| 30 |
+
|
| 31 |
+
Lowest-F1 records (top 5):
|
| 32 |
+
# 29 F1=0.571 missed=['forename', 'author'] spurious=['forename', 'forename', 'forename', 'author']
|
| 33 |
+
"Cohen, Gary B. Education and Middle Class Society in Im..."
|
| 34 |
+
# 2 F1=0.615 missed=['orgName', 'orgName', 'title'] spurious=['orgName', 'note']
|
| 35 |
+
"Commission Inter-IREM Collège & Commission Inter-IREM S..."
|
| 36 |
+
# 5 F1=0.625 missed=['forename', 'author', 'title'] spurious=['forename', 'author', 'title']
|
| 37 |
+
"Doyle JJ. 1998. Phylogenetic perspectives on nodulation..."
|
| 38 |
+
# 17 F1=0.667 missed=['publisher'] spurious=['orgName', 'orgName', 'publisher', 'orgName', 'pubPlace']
|
| 39 |
+
"McGrath, P. 2005 Toronto in the 1850s: A Transcription ..."
|
| 40 |
+
# 28 F1=0.667 missed=['title', 'publisher'] spurious=['note', 'note', 'orgName', 'orgName', 'orgName']
|
| 41 |
+
"Oxenford, J.L. & Williams, S.I., 2009. Failure and Root..."
|
.local/evaluate-llm.log
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 3 |
+
Provider : Gemini 2.0 Flash
|
| 4 |
+
Gold file : tests/fixtures/blbl-examples.tei.xml
|
| 5 |
+
Records : 10 match-mode: overlap
|
| 6 |
+
GLiNER : disabled
|
| 7 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 8 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 9 |
+
Gold: <author><orgName>Commission Inter-IREM CollΓ¨ge</orgName></author> & <author><orgName>Commission Inter-IREM Statistiques et ProbabilitΓ©s</orgName></author>, (<date>2012</date>). <title level="a">ProbabilitΓ©s au collΓ¨ge : ne pas laisser lβenseignement des probabilitΓ©s au hasardβ¦</title>. Dans <title level="j">Brochure APMEP</title> nΒ°<biblScope unit="volume">198</biblScope>.
|
| 10 |
+
Annotation: <orgName><author>Commission Inter-IREM CollΓ¨ge & Commission Inter-IREM Statistiques et ProbabilitΓ©s</author></orgName>, <date>(2012)</date>. <title>ProbabilitΓ©s au collΓ¨ge : ne pas laisser lβenseignement des probabilitΓ©s au hasardβ¦.</title> Dans <title level="s">Brochure APMEP</title> <biblScope>nΒ°198</biblScope>.
|
| 11 |
+
F1=0.857 missed=['orgName', 'author'] spurious=[]
|
| 12 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 13 |
+
Gold: <author><surname>Russell</surname>, <forename>D.A.</forename> and <forename>Michael</forename> <surname>Winterbottom</surname></author> <date>1989</date> [<date>1972</date>]. <title level="m">Classical Literary Criticism. Oxford World Classics</title>. <pubPlace>Oxford</pubPlace>: <publisher>Oxford UP</publisher>.
|
| 14 |
+
Annotation: <author><surname>Russell</surname>, <forename>D.A.</forename></author> and <author><forename>Michael</forename> <surname>Winterbottom</surname></author> <date>1989</date> <date>[1972]</date>. <title level="m">Classical Literary Criticism</title>. <title level="s"><pubPlace>Oxford</pubPlace> World Classics</title>. Oxford: <publisher><orgName>Oxford UP</orgName></publisher>.
|
| 15 |
+
F1=0.783 missed=['pubPlace'] spurious=['author', 'pubPlace', 'title', 'orgName']
|
| 16 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 17 |
+
Gold: <label>17.</label><author><surname>Creed</surname> <forename>PA</forename>, <surname>Hicks</surname> <forename>RE</forename>, <surname>Machin</surname> <forename>MA</forename></author>. <title level="a">Behavioural plasticity and mental health outcomes for long-term unemployed attending occupational training programmes</title>. <title level="j">J Occup Org Psychol</title>. <date>1998</date>;<biblScope unit="volume">71</biblScope>: <biblScope unit="page">171-91</biblScope>.
|
| 18 |
+
Annotation: <label>17.</label><author><surname>Creed</surname> <forename>PA</forename></author>, <author><surname>Hicks</surname> <forename>RE</forename></author>, <author><surname>Machin</surname> <forename>MA</forename></author>. <title level="a">Behavioural plasticity and mental health outcomes for long-term unemployed attending occupational training programmes</title>. <title level="j">J Occup Org Psychol</title>. <date>1998</date>;<biblScope unit="volume">71</biblScope>: <biblScope unit="page">171-91</biblScope>.
|
| 19 |
+
F1=0.857 missed=['author'] spurious=['author', 'author', 'author']
|
| 20 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 21 |
+
Gold: <label>25.</label> <author><surname>Spickett-Jones</surname>, <forename>J. G.</forename> & <forename>T.-Y.</forename> <surname>Eng</surname></author> (<date>2006</date>). β<title level="a">SMEs and the Strategic Context for Communication</title>ββ, <title level="j">Journal of Marketing Communications</title>, Vol. <biblScope unit="volume">12</biblScope>(<biblScope unit="issue">3</biblScope>), <biblScope unit="page">225 - 243</biblScope>.
|
| 22 |
+
Annotation: <label>25.</label> <author><surname>Spickett-Jones</surname>, <forename>J. G.</forename></author> & <author><forename>T.-Y.</forename> <surname>Eng</surname></author> <date>(2006)</date>. <title level="a">βSMEs and the Strategic Context for Communicationββ</title>, <title level="j">Journal of Marketing Communications</title>, Vol. <biblScope unit="volume">12</biblScope>(<biblScope unit="issue">3</biblScope>), <biblScope unit="page">225 - 243</biblScope>.
|
| 23 |
+
F1=0.960 missed=[] spurious=['author']
|
| 24 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 25 |
+
Gold: <author><surname>LilliΓ©</surname>, <forename>F.</forename></author>, <title level="m">Analyse tectonique de Gisement Claude</title> (<pubPlace>Cluff Lake, Saskatchewan</pubPlace>). <note type="report">Amok Internal Report</note>. <date>1982</date>.
|
| 26 |
+
Annotation: <author><surname>LilliΓ©</surname>, <forename>F.</forename></author>, <title>Analyse tectonique de Gisement Claude (Cluff Lake, Saskatchewan)</title>. <note type="report">Amok Internal Report</note>. <date>1982</date>.
|
| 27 |
+
F1=0.923 missed=['pubPlace'] spurious=[]
|
| 28 |
+
|
| 29 |
+
Completed: 10/10 records
|
| 30 |
+
|
| 31 |
+
=== Overall β Gemini 2.0 Flash ===
|
| 32 |
+
Micro P=0.914 R=0.944 F1=0.929 (TP=85 FP=8 FN=5)
|
| 33 |
+
Macro P=0.882 R=0.888 F1=0.882
|
| 34 |
+
|
| 35 |
+
Per-element breakdown:
|
| 36 |
+
author P=0.643 R=0.818 F1=0.720 (TP=9 FP=5 FN=2)
|
| 37 |
+
biblScope P=1.000 R=1.000 F1=1.000 (TP=14 FP=0 FN=0)
|
| 38 |
+
date P=1.000 R=1.000 F1=1.000 (TP=11 FP=0 FN=0)
|
| 39 |
+
editor P=1.000 R=1.000 F1=1.000 (TP=1 FP=0 FN=0)
|
| 40 |
+
forename P=1.000 R=1.000 F1=1.000 (TP=13 FP=0 FN=0)
|
| 41 |
+
label P=1.000 R=1.000 F1=1.000 (TP=2 FP=0 FN=0)
|
| 42 |
+
note P=1.000 R=1.000 F1=1.000 (TP=1 FP=0 FN=0)
|
| 43 |
+
orgName P=0.500 R=0.500 F1=0.500 (TP=1 FP=1 FN=1)
|
| 44 |
+
pubPlace P=0.500 R=0.333 F1=0.400 (TP=1 FP=1 FN=2)
|
| 45 |
+
publisher P=1.000 R=1.000 F1=1.000 (TP=2 FP=0 FN=0)
|
| 46 |
+
surname P=1.000 R=1.000 F1=1.000 (TP=14 FP=0 FN=0)
|
| 47 |
+
title P=0.941 R=1.000 F1=0.970 (TP=16 FP=1 FN=0)
|
| 48 |
+
|
| 49 |
+
Lowest-F1 records (top 5):
|
| 50 |
+
# 4 F1=0.783 missed=['pubPlace'] spurious=['author', 'pubPlace', 'title', 'orgName']
|
| 51 |
+
"Russell, D.A. and Michael Winterbottom 1989 [1972]. Cla..."
|
| 52 |
+
# 2 F1=0.857 missed=['orgName', 'author'] spurious=[]
|
| 53 |
+
"Commission Inter-IREM Collège & Commission Inter-IREM S..."
|
| 54 |
+
# 7 F1=0.857 missed=['author'] spurious=['author', 'author', 'author']
|
| 55 |
+
"17.Creed PA, Hicks RE, Machin MA. Behavioural plasticit..."
|
| 56 |
+
# 9 F1=0.923 missed=['pubPlace'] spurious=[]
|
| 57 |
+
"LilliΓ©, F., Analyse tectonique de Gisement Claude (Cluf..."
|
| 58 |
+
# 8 F1=0.960 missed=[] spurious=['author']
|
| 59 |
+
"25. Spickett-Jones, J. G. & T.-Y. Eng (2006). βSMEs and..."
|
| 60 |
+
|
| 61 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 62 |
+
Provider : KISSKI / llama-3.3-70b-instruct
|
| 63 |
+
Gold file : tests/fixtures/blbl-examples.tei.xml
|
| 64 |
+
Records : 10 match-mode: overlap
|
| 65 |
+
GLiNER : disabled
|
| 66 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 67 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 68 |
+
Gold: <author><orgName>Commission Inter-IREM CollΓ¨ge</orgName></author> & <author><orgName>Commission Inter-IREM Statistiques et ProbabilitΓ©s</orgName></author>, (<date>2012</date>). <title level="a">ProbabilitΓ©s au collΓ¨ge : ne pas laisser lβenseignement des probabilitΓ©s au hasardβ¦</title>. Dans <title level="j">Brochure APMEP</title> nΒ°<biblScope unit="volume">198</biblScope>.
|
| 69 |
+
Annotation: <author><orgName>Commission Inter-IREM CollΓ¨ge</orgName> & <orgName>Commission Inter-IREM Statistiques et ProbabilitΓ©s</orgName></author>, (<date>2012</date>). <title level="a">ProbabilitΓ©s au collΓ¨ge : ne pas laisser lβenseignement des probabilitΓ©s au hasardβ¦.</title> Dans <title level="m">Brochure APMEP nΒ°198</title>.
|
| 70 |
+
F1=0.857 missed=['author', 'biblScope'] spurious=[]
|
| 71 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 72 |
+
Gold: <author><surname>Russell</surname>, <forename>D.A.</forename> and <forename>Michael</forename> <surname>Winterbottom</surname></author> <date>1989</date> [<date>1972</date>]. <title level="m">Classical Literary Criticism. Oxford World Classics</title>. <pubPlace>Oxford</pubPlace>: <publisher>Oxford UP</publisher>.
|
| 73 |
+
Annotation: <author><surname>Russell</surname>, <forename>D.A.</forename> and <forename>Michael</forename> <surname>Winterbottom</surname></author> <date>1989 [1972]</date>. <title level="m">Classical Literary Criticism</title>. <title level="s">Oxford World Classics</title>. <pubPlace>Oxford</pubPlace>: <publisher>Oxford UP</publisher>.
|
| 74 |
+
F1=0.800 missed=['date', 'date'] spurious=['date', 'title']
|
| 75 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 76 |
+
Gold: <label>17.</label><author><surname>Creed</surname> <forename>PA</forename>, <surname>Hicks</surname> <forename>RE</forename>, <surname>Machin</surname> <forename>MA</forename></author>. <title level="a">Behavioural plasticity and mental health outcomes for long-term unemployed attending occupational training programmes</title>. <title level="j">J Occup Org Psychol</title>. <date>1998</date>;<biblScope unit="volume">71</biblScope>: <biblScope unit="page">171-91</biblScope>.
|
| 77 |
+
Annotation: <label>17</label>.<author><surname>Creed</surname> <forename>PA</forename></author>, <author><surname>Hicks</surname> <forename>RE</forename></author>, <author><surname>Machin</surname> <forename>MA</forename></author>. <title level="a">Behavioural plasticity and mental health outcomes for long-term unemployed attending occupational training programmes</title>. <title level="j">J Occup Org Psychol</title>. <date>1998</date>;<biblScope unit="volume">71</biblScope>: <biblScope unit="page">171-91</biblScope>.
|
| 78 |
+
F1=0.857 missed=['author'] spurious=['author', 'author', 'author']
|
| 79 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 80 |
+
Gold: <label>25.</label> <author><surname>Spickett-Jones</surname>, <forename>J. G.</forename> & <forename>T.-Y.</forename> <surname>Eng</surname></author> (<date>2006</date>). β<title level="a">SMEs and the Strategic Context for Communication</title>ββ, <title level="j">Journal of Marketing Communications</title>, Vol. <biblScope unit="volume">12</biblScope>(<biblScope unit="issue">3</biblScope>), <biblScope unit="page">225 - 243</biblScope>.
|
| 81 |
+
Annotation: <label>25</label>. <author><surname>Spickett-Jones</surname>, <forename>J. G.</forename> & <forename>T.-Y.</forename> <surname>Eng</surname></author> (<date>2006</date>). βSMEs and the Strategic Context for Communicationββ, <title level="j">Journal of Marketing Communications</title>, <biblScope unit="volume">Vol. 12(3)</biblScope>, <biblScope unit="page">225 - 243</biblScope>.
|
| 82 |
+
F1=0.818 missed=['title', 'biblScope', 'biblScope'] spurious=['biblScope']
|
| 83 |
+
|
| 84 |
+
Completed: 10/10 records
|
| 85 |
+
|
| 86 |
+
=== Overall β KISSKI / llama-3.3-70b-instruct ===
|
| 87 |
+
Micro P=0.932 R=0.911 F1=0.921 (TP=82 FP=6 FN=8)
|
| 88 |
+
Macro P=0.959 R=0.947 F1=0.952
|
| 89 |
+
|
| 90 |
+
Per-element breakdown:
|
| 91 |
+
author P=0.750 R=0.818 F1=0.783 (TP=9 FP=3 FN=2)
|
| 92 |
+
biblScope P=0.917 R=0.786 F1=0.846 (TP=11 FP=1 FN=3)
|
| 93 |
+
date P=0.900 R=0.818 F1=0.857 (TP=9 FP=1 FN=2)
|
| 94 |
+
editor P=1.000 R=1.000 F1=1.000 (TP=1 FP=0 FN=0)
|
| 95 |
+
forename P=1.000 R=1.000 F1=1.000 (TP=13 FP=0 FN=0)
|
| 96 |
+
label P=1.000 R=1.000 F1=1.000 (TP=2 FP=0 FN=0)
|
| 97 |
+
note P=1.000 R=1.000 F1=1.000 (TP=1 FP=0 FN=0)
|
| 98 |
+
orgName P=1.000 R=1.000 F1=1.000 (TP=2 FP=0 FN=0)
|
| 99 |
+
pubPlace P=1.000 R=1.000 F1=1.000 (TP=3 FP=0 FN=0)
|
| 100 |
+
publisher P=1.000 R=1.000 F1=1.000 (TP=2 FP=0 FN=0)
|
| 101 |
+
surname P=1.000 R=1.000 F1=1.000 (TP=14 FP=0 FN=0)
|
| 102 |
+
title P=0.938 R=0.938 F1=0.938 (TP=15 FP=1 FN=1)
|
| 103 |
+
|
| 104 |
+
Lowest-F1 records (top 5):
|
| 105 |
+
# 4 F1=0.800 missed=['date', 'date'] spurious=['date', 'title']
|
| 106 |
+
"Russell, D.A. and Michael Winterbottom 1989 [1972]. Cla..."
|
| 107 |
+
# 8 F1=0.818 missed=['title', 'biblScope', 'biblScope'] spurious=['biblScope']
|
| 108 |
+
"25. Spickett-Jones, J. G. & T.-Y. Eng (2006). βSMEs and..."
|
| 109 |
+
# 2 F1=0.857 missed=['author', 'biblScope'] spurious=[]
|
| 110 |
+
"Commission Inter-IREM Collège & Commission Inter-IREM S..."
|
| 111 |
+
# 7 F1=0.857 missed=['author'] spurious=['author', 'author', 'author']
|
| 112 |
+
"17.Creed PA, Hicks RE, Machin MA. Behavioural plasticit..."
|
.local/gemini-batch10-full.log
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 3 |
+
Provider : Gemini 2.0 Flash
|
| 4 |
+
Gold file : tests/fixtures/blbl-examples.tei.xml
|
| 5 |
+
Records : 162 match-mode: text
|
| 6 |
+
Batch size: 10
|
| 7 |
+
GLiNER : disabled
|
| 8 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 9 |
+
|
| 10 |
+
Completed: 162/162 records
|
| 11 |
+
|
| 12 |
+
=== Overall β Gemini 2.0 Flash ===
|
| 13 |
+
Micro P=0.799 R=0.696 F1=0.744 (TP=1126 FP=283 FN=492)
|
| 14 |
+
Macro P=0.704 R=0.589 F1=0.627
|
| 15 |
+
|
| 16 |
+
Per-element breakdown:
|
| 17 |
+
author P=0.522 R=0.623 F1=0.568 (TP=96 FP=88 FN=58)
|
| 18 |
+
biblScope P=0.902 R=0.653 F1=0.758 (TP=111 FP=12 FN=59)
|
| 19 |
+
date P=0.891 R=0.780 F1=0.832 (TP=131 FP=16 FN=37)
|
| 20 |
+
editor P=0.417 R=0.484 F1=0.448 (TP=15 FP=21 FN=16)
|
| 21 |
+
forename P=0.900 R=0.754 F1=0.820 (TP=242 FP=27 FN=79)
|
| 22 |
+
idno P=1.000 R=0.500 F1=0.667 (TP=1 FP=0 FN=1)
|
| 23 |
+
label P=1.000 R=0.727 F1=0.842 (TP=8 FP=0 FN=3)
|
| 24 |
+
note P=0.412 R=0.318 F1=0.359 (TP=7 FP=10 FN=15)
|
| 25 |
+
orgName P=0.207 R=0.545 F1=0.300 (TP=6 FP=23 FN=5)
|
| 26 |
+
page P=0.000 R=0.000 F1=0.000 (TP=0 FP=0 FN=1)
|
| 27 |
+
ptr P=1.000 R=0.600 F1=0.750 (TP=3 FP=0 FN=2)
|
| 28 |
+
pubPlace P=0.783 R=0.806 F1=0.794 (TP=54 FP=15 FN=13)
|
| 29 |
+
publisher P=0.818 R=0.692 F1=0.750 (TP=45 FP=10 FN=20)
|
| 30 |
+
surname P=0.977 R=0.796 F1=0.878 (TP=258 FP=6 FN=66)
|
| 31 |
+
title P=0.730 R=0.560 F1=0.634 (TP=149 FP=55 FN=117)
|
| 32 |
+
|
| 33 |
+
Lowest-F1 records (top 5):
|
| 34 |
+
#141 F1=0.000 missed=['surname', 'forename', 'surname', 'forename', 'surname', 'forename', 'surname', 'forename', 'author', 'title', 'pubPlace', 'surname', 'forename', 'surname', 'forename', 'editor', 'title', 'publisher', 'pubPlace', 'biblScope', 'date'] spurious=[]
|
| 35 |
+
"Engelhardt, W. v., HΓΆrz, F., StΓΆffler, D. and Bertsch, ..."
|
| 36 |
+
#142 F1=0.000 missed=['label', 'surname', 'forename', 'surname', 'forename', 'surname', 'forename', 'surname', 'forename', 'surname', 'forename', 'surname', 'forename', 'author', 'title', 'title', 'date', 'biblScope', 'biblScope'] spurious=[]
|
| 37 |
+
"5-Decq P, Bokombe D, Nguyen Jp, Djindjian M, Molina P, ..."
|
| 38 |
+
#143 F1=0.000 missed=['surname', 'forename', 'author', 'title', 'title', 'biblScope', 'biblScope', 'date'] spurious=[]
|
| 39 |
+
"Hildebrand, A.R., et al., Mapping Chicxulub crater stru..."
|
| 40 |
+
#144 F1=0.000 missed=['surname', 'forename', 'surname', 'forename', 'surname', 'forename', 'author', 'title', 'title', 'date', 'note'] spurious=[]
|
| 41 |
+
"Grande BM, Albuquerque M, Morin RD, βTowards a Cloud-re..."
|
| 42 |
+
#145 F1=0.000 missed=['surname', 'forename', 'author', 'title', 'publisher', 'title', 'pubPlace', 'date'] spurious=[]
|
| 43 |
+
"GREEN, Christopher, Art in France: 1900-1940, Yale Univ..."
|
.local/gemini-batch10-full.progress
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
| 0 |
-
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
, Β« Enjeux socio-culturels des discours amou: 74%|ββββββββ | 120/162 [08:45<02:55, 4.18s/rec, F1=0.857]
|
| 5 |
-
, Β« Enjeux socio-culturels des discours amou: 74%|ββββββββ | 120/162 [08:45<02:55, 4.18s/rec, F1=0.286]
|
|
|
|
| 1 |
+
warning: `VIRTUAL_ENV=/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9` does not match the project environment path `.venv` and will be ignored; use `--active` to target the active environment instead
|
| 2 |
+
|
| 3 |
+
batch_results = _evaluate_batch(
|
| 4 |
+
|
| 5 |
+
batch_results = _evaluate_batch(
|
| 6 |
+
|
|
|
|
|
|
.local/kisski-batch1.log
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 3 |
+
Provider : KISSKI / llama-3.3-70b-instruct
|
| 4 |
+
Gold file : tests/fixtures/blbl-examples.tei.xml
|
| 5 |
+
Records : 162 match-mode: text
|
| 6 |
+
Batch size: 1
|
| 7 |
+
GLiNER : disabled
|
| 8 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 9 |
+
|
| 10 |
+
Completed: 162/162 records
|
| 11 |
+
|
| 12 |
+
=== Overall β KISSKI / llama-3.3-70b-instruct ===
|
| 13 |
+
Micro P=0.876 R=0.858 F1=0.867 (TP=1388 FP=196 FN=230)
|
| 14 |
+
Macro P=0.663 R=0.634 F1=0.636
|
| 15 |
+
|
| 16 |
+
Per-element breakdown:
|
| 17 |
+
author P=0.731 R=0.831 F1=0.778 (TP=128 FP=47 FN=26)
|
| 18 |
+
biblScope P=0.865 R=0.829 F1=0.847 (TP=141 FP=22 FN=29)
|
| 19 |
+
date P=0.958 R=0.940 F1=0.949 (TP=158 FP=7 FN=10)
|
| 20 |
+
editor P=0.852 R=0.742 F1=0.793 (TP=23 FP=4 FN=8)
|
| 21 |
+
forename P=0.968 R=0.941 F1=0.954 (TP=302 FP=10 FN=19)
|
| 22 |
+
idno P=0.000 R=0.000 F1=0.000 (TP=0 FP=3 FN=2)
|
| 23 |
+
label P=0.333 R=0.182 F1=0.235 (TP=2 FP=4 FN=9)
|
| 24 |
+
note P=0.727 R=0.364 F1=0.485 (TP=8 FP=3 FN=14)
|
| 25 |
+
orgName P=0.296 R=0.727 F1=0.421 (TP=8 FP=19 FN=3)
|
| 26 |
+
page P=0.000 R=0.000 F1=0.000 (TP=0 FP=0 FN=1)
|
| 27 |
+
ptr P=0.750 R=0.600 F1=0.667 (TP=3 FP=1 FN=2)
|
| 28 |
+
pubPlace P=0.841 R=0.866 F1=0.853 (TP=58 FP=11 FN=9)
|
| 29 |
+
publisher P=0.852 R=0.800 F1=0.825 (TP=52 FP=9 FN=13)
|
| 30 |
+
surname P=0.988 R=0.978 F1=0.983 (TP=317 FP=4 FN=7)
|
| 31 |
+
title P=0.783 R=0.707 F1=0.743 (TP=188 FP=52 FN=78)
|
| 32 |
+
|
| 33 |
+
Lowest-F1 records (top 5):
|
| 34 |
+
# 11 F1=0.000 missed=['surname', 'forename', 'author', 'date', 'title', 'forename', 'surname', 'editor', 'title', 'pubPlace', 'publisher', 'biblScope'] spurious=[]
|
| 35 |
+
"Jakobson, Roman 1960. "Closing Statement: Linguistics a..."
|
| 36 |
+
# 97 F1=0.182 missed=['surname', 'forename', 'author', 'title', 'ptr'] spurious=['forename', 'surname', 'author', 'title']
|
| 37 |
+
"York H. Dobyns Journal of Scientific Exploration, 1996 ..."
|
| 38 |
+
# 39 F1=0.400 missed=['title', 'publisher'] spurious=['author', 'orgName', 'title', 'orgName']
|
| 39 |
+
"Le Monde. 2016. βOpΓ©ration Tulipe Β» : les coulisses de ..."
|
| 40 |
+
# 93 F1=0.545 missed=['publisher', 'biblScope'] spurious=['orgName', 'author', 'biblScope']
|
| 41 |
+
"MinistΓ¨re des ressources naturelles (1996). LβΓ©nergie a..."
|
| 42 |
+
# 85 F1=0.571 missed=['title'] spurious=['author', 'orgName']
|
| 43 |
+
"PCC Access Points for Expressions Task Group. (2012). β..."
|
.local/kisski-batch1.progress
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 0 |
-
--- , βChiasmus in the New Testament.β In Ch: 43%|βββββ | 70/162 [21:29<24:34, 16.03s/rec, F1=0.750]
|
| 1 |
-
--- , βChiasmus in the New Testament.β In Ch: 44%|βββββ | 71/162 [21:40<22:00, 14.51s/rec, F1=0.750]
|
| 2 |
-
--- , βChiasmus in the New Testament.β In Ch: 44%|βββββ | 71/162 [21:40<22:00, 14.51s/rec, F1=0.778]
|
| 3 |
-
----- , and Haendel, V. (1983). The motives : 49%|βββββ | 79/162 [23:37<22:23, 16.18s/rec, F1=0.880]
|
| 4 |
-
----- , and Haendel, V. (1983). The motives : 49%|βββββ | 80/162 [24:04<26:33, 19.44s/rec, F1=0.880]
|
| 5 |
-
----- , and Haendel, V. (1983). The motives : 49%|βββββ | 80/162 [24:04<26:33, 19.44s/rec, F1=0.914]
|
| 6 |
-
, Β« Enjeux socio-culturels des discours amou: 68%|βββββββ | 110/162 [31:39<26:31, 30.60s/rec, F1=0.706]
|
| 7 |
-
, Β« Enjeux socio-culturels des discours amou: 69%|βββββββ | 111/162 [31:52<21:25, 25.20s/rec, F1=0.706]
|
| 8 |
-
, Β« Enjeux socio-culturels des discours amou: 69%|βββββββ | 111/162 [31:52<21:25, 25.20s/rec, F1=0.833]
|
| 9 |
-
Luke and the Law (Cambridge: Cambridge Unive: 72%|ββββββββ | 116/162 [33:29<14:45, 19.24s/rec, F1=0.800]
|
| 10 |
-
Luke and the Law (Cambridge: Cambridge Unive: 72%|ββββββββ | 117/162 [33:34<11:20, 15.12s/rec, F1=0.800]
|
| 11 |
-
Luke and the Law (Cambridge: Cambridge Unive: 72%|ββββββββ | 117/162 [33:34<11:20, 15.12s/rec, F1=0.889]
|
|
|
|
| 1 |
+
warning: `VIRTUAL_ENV=/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9` does not match the project environment path `.venv` and will be ignored; use `--active` to target the active environment instead
|
| 2 |
+
|
| 3 |
+
result = annotate(
|
| 4 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.local/kisski-batch10-t600.log
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 3 |
+
Provider : KISSKI / llama-3.3-70b-instruct
|
| 4 |
+
Gold file : tests/fixtures/blbl-examples.tei.xml
|
| 5 |
+
Records : 162 match-mode: text
|
| 6 |
+
Batch size: 10
|
| 7 |
+
GLiNER : disabled
|
| 8 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 9 |
+
|
| 10 |
+
Completed: 162/162 records
|
| 11 |
+
|
| 12 |
+
=== Overall β KISSKI / llama-3.3-70b-instruct ===
|
| 13 |
+
Micro P=0.869 R=0.839 F1=0.854 (TP=1357 FP=204 FN=261)
|
| 14 |
+
Macro P=0.652 R=0.612 F1=0.619
|
| 15 |
+
|
| 16 |
+
Per-element breakdown:
|
| 17 |
+
author P=0.741 R=0.818 F1=0.778 (TP=126 FP=44 FN=28)
|
| 18 |
+
biblScope P=0.840 R=0.771 F1=0.804 (TP=131 FP=25 FN=39)
|
| 19 |
+
date P=0.921 R=0.905 F1=0.913 (TP=152 FP=13 FN=16)
|
| 20 |
+
editor P=0.889 R=0.774 F1=0.828 (TP=24 FP=3 FN=7)
|
| 21 |
+
forename P=0.958 R=0.931 F1=0.945 (TP=299 FP=13 FN=22)
|
| 22 |
+
idno P=0.000 R=0.000 F1=0.000 (TP=0 FP=1 FN=2)
|
| 23 |
+
label P=0.286 R=0.182 F1=0.222 (TP=2 FP=5 FN=9)
|
| 24 |
+
note P=0.727 R=0.364 F1=0.485 (TP=8 FP=3 FN=14)
|
| 25 |
+
orgName P=0.333 R=0.727 F1=0.457 (TP=8 FP=16 FN=3)
|
| 26 |
+
page P=0.000 R=0.000 F1=0.000 (TP=0 FP=0 FN=1)
|
| 27 |
+
ptr P=0.667 R=0.400 F1=0.500 (TP=2 FP=1 FN=3)
|
| 28 |
+
pubPlace P=0.831 R=0.881 F1=0.855 (TP=59 FP=12 FN=8)
|
| 29 |
+
publisher P=0.820 R=0.769 F1=0.794 (TP=50 FP=11 FN=15)
|
| 30 |
+
surname P=0.978 R=0.966 F1=0.972 (TP=313 FP=7 FN=11)
|
| 31 |
+
title P=0.785 R=0.688 F1=0.733 (TP=183 FP=50 FN=83)
|
| 32 |
+
|
| 33 |
+
Lowest-F1 records (top 5):
|
| 34 |
+
#111 F1=0.000 missed=['author', 'title', 'title', 'biblScope', 'date', 'biblScope'] spurious=[]
|
| 35 |
+
"-, Β« Enjeux socio-culturels des discours amoureux dans ..."
|
| 36 |
+
# 97 F1=0.182 missed=['surname', 'forename', 'author', 'title', 'ptr'] spurious=['forename', 'surname', 'author', 'title']
|
| 37 |
+
"York H. Dobyns Journal of Scientific Exploration, 1996 ..."
|
| 38 |
+
# 93 F1=0.222 missed=['publisher', 'title', 'date', 'biblScope'] spurious=['orgName', 'author', 'title']
|
| 39 |
+
"MinistΓ¨re des ressources naturelles (1996). LβΓ©nergie a..."
|
| 40 |
+
# 39 F1=0.250 missed=['title', 'title', 'publisher'] spurious=['orgName', 'author', 'title']
|
| 41 |
+
"Le Monde. 2016. βOpΓ©ration Tulipe Β» : les coulisses de ..."
|
| 42 |
+
#152 F1=0.462 missed=['forename', 'surname', 'forename', 'surname', 'forename', 'surname', 'title'] spurious=['surname', 'forename', 'surname', 'forename', 'surname', 'forename', 'title']
|
| 43 |
+
"Irda Fidrianny, Dian Ayu, Rika Hartati. Antioxidant Cap..."
|
.local/kisski-batch10-t600.progress
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 0 |
-
|
| 1 |
-
|
| 2 |
-
--- , βChiasmus in the New Testament.β In Ch: 49%|βββββ | 80/162 [28:42<28:15, 20.68s/rec, F1=0.778]
|
| 3 |
-
, Β« Enjeux socio-culturels des discours amou: 68%|βββββββ | 110/162 [38:17<17:53, 20.64s/rec, F1=0.714]
|
| 4 |
-
, Β« Enjeux socio-culturels des discours amou: 74%|ββββββββ | 120/162 [41:33<14:14, 20.33s/rec, F1=0.714]
|
| 5 |
-
, Β« Enjeux socio-culturels des discours amou: 74%|ββββββββ | 120/162 [41:33<14:14, 20.33s/rec, F1=0.000]
|
|
|
|
| 1 |
+
warning: `VIRTUAL_ENV=/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9` does not match the project environment path `.venv` and will be ignored; use `--active` to target the active environment instead
|
| 2 |
+
|
| 3 |
+
annotated_text = inject_xml(plain_text, deduped)
|
| 4 |
+
|
| 5 |
+
annotated_text = inject_xml(plain_text, deduped)
|
| 6 |
+
|
|
|
|
|
|
|
|
|
|
|
|
.local/kisski-batch10.log
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 3 |
+
Provider : KISSKI / llama-3.3-70b-instruct
|
| 4 |
+
Gold file : tests/fixtures/blbl-examples.tei.xml
|
| 5 |
+
Records : 162 match-mode: text
|
| 6 |
+
Batch size: 10
|
| 7 |
+
GLiNER : disabled
|
| 8 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 9 |
+
|
| 10 |
+
[1-10/162] ERROR β The read operation timed out
|
| 11 |
+
|
| 12 |
+
[11-20/162] ERROR β The read operation timed out
|
| 13 |
+
|
| 14 |
+
[21-30/162] ERROR β The read operation timed out
|
| 15 |
+
|
| 16 |
+
[31-40/162] ERROR β The read operation timed out
|
| 17 |
+
|
| 18 |
+
[41-50/162] ERROR β The read operation timed out
|
| 19 |
+
|
| 20 |
+
[51-60/162] ERROR β The read operation timed out
|
| 21 |
+
|
| 22 |
+
[61-70/162] ERROR β The read operation timed out
|
| 23 |
+
|
| 24 |
+
[81-90/162] ERROR β The read operation timed out
|
| 25 |
+
|
| 26 |
+
[91-100/162] ERROR β The read operation timed out
|
| 27 |
+
|
| 28 |
+
[101-110/162] ERROR β The read operation timed out
|
| 29 |
+
|
| 30 |
+
[111-120/162] ERROR β The read operation timed out
|
| 31 |
+
|
| 32 |
+
[121-130/162] ERROR β The read operation timed out
|
| 33 |
+
|
| 34 |
+
[141-150/162] ERROR β The read operation timed out
|
| 35 |
+
|
| 36 |
+
[151-160/162] ERROR β The read operation timed out
|
| 37 |
+
|
| 38 |
+
Completed: 22/162 records (140 failed)
|
| 39 |
+
|
| 40 |
+
=== Overall β KISSKI / llama-3.3-70b-instruct ===
|
| 41 |
+
Micro P=0.877 R=0.811 F1=0.843 (TP=185 FP=26 FN=43)
|
| 42 |
+
Macro P=0.702 R=0.654 F1=0.672
|
| 43 |
+
|
| 44 |
+
Per-element breakdown:
|
| 45 |
+
author P=0.783 R=0.900 F1=0.837 (TP=18 FP=5 FN=2)
|
| 46 |
+
biblScope P=0.714 R=0.556 F1=0.625 (TP=10 FP=4 FN=8)
|
| 47 |
+
date P=1.000 R=1.000 F1=1.000 (TP=21 FP=0 FN=0)
|
| 48 |
+
editor P=1.000 R=0.667 F1=0.800 (TP=6 FP=0 FN=3)
|
| 49 |
+
forename P=0.979 R=0.979 F1=0.979 (TP=46 FP=1 FN=1)
|
| 50 |
+
label P=0.000 R=0.000 F1=0.000 (TP=0 FP=0 FN=1)
|
| 51 |
+
note P=0.000 R=0.000 F1=0.000 (TP=0 FP=0 FN=4)
|
| 52 |
+
orgName P=0.000 R=0.000 F1=0.000 (TP=0 FP=1 FN=0)
|
| 53 |
+
pubPlace P=0.846 R=0.917 F1=0.880 (TP=11 FP=2 FN=1)
|
| 54 |
+
publisher P=0.750 R=0.692 F1=0.720 (TP=9 FP=3 FN=4)
|
| 55 |
+
surname P=0.979 R=0.979 F1=0.979 (TP=46 FP=1 FN=1)
|
| 56 |
+
title P=0.667 R=0.500 F1=0.571 (TP=18 FP=9 FN=18)
|
| 57 |
+
|
| 58 |
+
Lowest-F1 records (top 5):
|
| 59 |
+
# 14 F1=0.000 missed=['title'] spurious=['title']
|
| 60 |
+
"26-Vellin J-F, Achim V, Sinardet D, et al. Rapidly deve..."
|
| 61 |
+
# 17 F1=0.545 missed=['publisher', 'title'] spurious=['orgName', 'author', 'title']
|
| 62 |
+
"McGrath, P. 2005 Toronto in the 1850s: A Transcription ..."
|
| 63 |
+
# 12 F1=0.667 missed=['title', 'biblScope', 'pubPlace'] spurious=['pubPlace', 'publisher']
|
| 64 |
+
"Bybee, Joan L. 2002. Cognitive processes in grammatical..."
|
| 65 |
+
# 9 F1=0.667 missed=['title', 'title', 'biblScope', 'publisher', 'title'] spurious=['title', 'biblScope', 'biblScope']
|
| 66 |
+
"LilliΓ©, F., Analyse tectonique de Gisement Claude (Cluf..."
|
| 67 |
+
# 19 F1=0.667 missed=['title', 'biblScope', 'title', 'biblScope'] spurious=['title']
|
| 68 |
+
"Taitt, David. 1916. "Journal of David Taitt's Travels f..."
|
.local/kisski-batch10.progress
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
| 0 |
-
--- , βChiasmus in the New Testament.β In Ch: 43%|βββββ | 70/162 [14:00<18:24, 12.01s/rec]
|
| 1 |
-
--- , βChiasmus in the New Testament.β In Ch: 49%|βββββ | 80/162 [16:38<18:02, 13.21s/rec]
|
| 2 |
-
--- , βChiasmus in the New Testament.β In Ch: 49%|βββββ | 80/162 [16:38<18:02, 13.21s/rec, F1=0.947]
|
| 3 |
-
, Β« Enjeux socio-culturels des discours amou: 68%|βββββββ | 110/162 [22:38<10:44, 12.40s/rec, F1=0.947]
|
| 4 |
-
, Β« Enjeux socio-culturels des discours amou: 74%|ββββββββ | 120/162 [24:38<08:35, 12.28s/rec, F1=0.947]
|
|
|
|
| 1 |
+
warning: `VIRTUAL_ENV=/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9` does not match the project environment path `.venv` and will be ignored; use `--active` to target the active environment instead
|
| 2 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.local/kisski-batch162.log
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 3 |
+
Provider : KISSKI / llama-3.3-70b-instruct
|
| 4 |
+
Gold file : tests/fixtures/blbl-examples.tei.xml
|
| 5 |
+
Records : 162 match-mode: text
|
| 6 |
+
Batch size: 162
|
| 7 |
+
GLiNER : disabled
|
| 8 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 9 |
+
|
| 10 |
+
[1-162/162] ERROR β The read operation timed out
|
| 11 |
+
β All records failed β no results to report.
|
.local/kisski-batch162.progress
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
warning: `VIRTUAL_ENV=/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9` does not match the project environment path `.venv` and will be ignored; use `--active` to target the active environment instead
|
| 2 |
+
|
.local/kisski-batch50.log
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 3 |
+
Provider : KISSKI / llama-3.3-70b-instruct
|
| 4 |
+
Gold file : tests/fixtures/blbl-examples.tei.xml
|
| 5 |
+
Records : 162 match-mode: text
|
| 6 |
+
Batch size: 50
|
| 7 |
+
GLiNER : disabled
|
| 8 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 9 |
+
|
| 10 |
+
[1-50/162] ERROR β The read operation timed out
|
| 11 |
+
|
| 12 |
+
[51-100/162] ERROR β The read operation timed out
|
| 13 |
+
|
| 14 |
+
[101-150/162] ERROR β The read operation timed out
|
| 15 |
+
|
| 16 |
+
[151-162/162] ERROR β The read operation timed out
|
| 17 |
+
β All records failed β no results to report.
|
.local/kisski-batch50.progress
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
warning: `VIRTUAL_ENV=/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9` does not match the project environment path `.venv` and will be ignored; use `--active` to target the active environment instead
|
| 2 |
+
|
.pytest_cache/.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Created by pytest automatically.
|
| 2 |
+
*
|
.pytest_cache/CACHEDIR.TAG
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Signature: 8a477f597d28d172789f06886806bc55
|
| 2 |
+
# This file is a cache directory tag created by pytest.
|
| 3 |
+
# For information about cache directory tags, see:
|
| 4 |
+
# https://bford.info/cachedir/spec.html
|
.pytest_cache/README.md
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# pytest cache directory #
|
| 2 |
+
|
| 3 |
+
This directory contains data from the pytest's cache plugin,
|
| 4 |
+
which provides the `--lf` and `--ff` options, as well as the `cache` fixture.
|
| 5 |
+
|
| 6 |
+
**Do not** commit this to version control.
|
| 7 |
+
|
| 8 |
+
See [the docs](https://docs.pytest.org/en/stable/how-to/cache.html) for more information.
|
.pytest_cache/v/cache/lastfailed
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{}
|
.pytest_cache/v/cache/nodeids
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
"tests/integration/test_pipeline_e2e.py::test_attributes_preserved_end_to_end",
|
| 3 |
+
"tests/integration/test_pipeline_e2e.py::test_context_longer_than_span_text",
|
| 4 |
+
"tests/integration/test_pipeline_e2e.py::test_fuzzy_context_match_flags_span",
|
| 5 |
+
"tests/integration/test_pipeline_e2e.py::test_hallucinated_context_span_rejected",
|
| 6 |
+
"tests/integration/test_pipeline_e2e.py::test_long_text_entity_in_second_chunk",
|
| 7 |
+
"tests/integration/test_pipeline_e2e.py::test_multiple_occurrences_disambiguated_by_context",
|
| 8 |
+
"tests/integration/test_pipeline_e2e.py::test_nested_spans_end_to_end",
|
| 9 |
+
"tests/integration/test_pipeline_e2e.py::test_plain_text_invariant_with_multiple_entities",
|
| 10 |
+
"tests/integration/test_pipeline_e2e.py::test_preexisting_xml_preserved",
|
| 11 |
+
"tests/test_builder.py::test_candidates_appear_in_prompt",
|
| 12 |
+
"tests/test_builder.py::test_correction_prompt_contains_original_response",
|
| 13 |
+
"tests/test_builder.py::test_empty_candidates_list_no_section",
|
| 14 |
+
"tests/test_builder.py::test_extraction_raises",
|
| 15 |
+
"tests/test_builder.py::test_json_enforced_prompt_contains_schema",
|
| 16 |
+
"tests/test_builder.py::test_json_enforced_prompt_shorter_than_text_gen",
|
| 17 |
+
"tests/test_builder.py::test_no_candidate_section_when_none",
|
| 18 |
+
"tests/test_builder.py::test_text_gen_prompt_contains_example",
|
| 19 |
+
"tests/test_builder.py::test_text_gen_prompt_contains_json_instruction",
|
| 20 |
+
"tests/test_builder.py::test_text_gen_prompt_contains_schema_elements",
|
| 21 |
+
"tests/test_builder.py::test_text_gen_prompt_contains_source_text",
|
| 22 |
+
"tests/test_chunker.py::test_chunk_boundary_does_not_split_xml_tag",
|
| 23 |
+
"tests/test_chunker.py::test_chunk_start_offsets_correct",
|
| 24 |
+
"tests/test_chunker.py::test_exact_chunk_size_no_overflow",
|
| 25 |
+
"tests/test_chunker.py::test_long_text_covers_all_characters",
|
| 26 |
+
"tests/test_chunker.py::test_long_text_multiple_chunks",
|
| 27 |
+
"tests/test_chunker.py::test_overlap_produces_repeated_content",
|
| 28 |
+
"tests/test_chunker.py::test_short_text_single_chunk",
|
| 29 |
+
"tests/test_evaluation.py::TestAggregate::test_aggregate_concatenates_lists",
|
| 30 |
+
"tests/test_evaluation.py::TestAggregate::test_aggregate_empty",
|
| 31 |
+
"tests/test_evaluation.py::TestAggregate::test_aggregate_sums_counts",
|
| 32 |
+
"tests/test_evaluation.py::TestCertLowUnionMatch::test_cert_low_unmatched_with_no_merger_is_fn",
|
| 33 |
+
"tests/test_evaluation.py::TestCertLowUnionMatch::test_merged_pred_scores_as_two_tps",
|
| 34 |
+
"tests/test_evaluation.py::TestCertLowUnionMatch::test_split_pred_still_matches_normally",
|
| 35 |
+
"tests/test_evaluation.py::TestComputeMetrics::test_all_wrong_element",
|
| 36 |
+
"tests/test_evaluation.py::TestComputeMetrics::test_empty_gold_and_pred",
|
| 37 |
+
"tests/test_evaluation.py::TestComputeMetrics::test_macro_vs_micro_differ_on_imbalanced",
|
| 38 |
+
"tests/test_evaluation.py::TestComputeMetrics::test_partial_precision",
|
| 39 |
+
"tests/test_evaluation.py::TestComputeMetrics::test_partial_recall",
|
| 40 |
+
"tests/test_evaluation.py::TestComputeMetrics::test_per_element_breakdown",
|
| 41 |
+
"tests/test_evaluation.py::TestComputeMetrics::test_perfect_prediction",
|
| 42 |
+
"tests/test_evaluation.py::TestComputeMetrics::test_report_returns_string",
|
| 43 |
+
"tests/test_evaluation.py::TestEvaluateBibl::test_attributes_not_required_for_text_match",
|
| 44 |
+
"tests/test_evaluation.py::TestEvaluateBibl::test_empty_annotation",
|
| 45 |
+
"tests/test_evaluation.py::TestEvaluateBibl::test_exact_match_mode",
|
| 46 |
+
"tests/test_evaluation.py::TestEvaluateBibl::test_missing_span_reduces_recall",
|
| 47 |
+
"tests/test_evaluation.py::TestEvaluateBibl::test_perfect_annotation",
|
| 48 |
+
"tests/test_evaluation.py::TestEvaluateBibl::test_plain_text_element",
|
| 49 |
+
"tests/test_evaluation.py::TestEvaluateBibl::test_spurious_span_reduces_precision",
|
| 50 |
+
"tests/test_evaluation.py::TestEvaluateBibl::test_wrong_element_type",
|
| 51 |
+
"tests/test_evaluation.py::TestEvaluateElement::test_ampersand_in_text_is_escaped",
|
| 52 |
+
"tests/test_evaluation.py::TestEvaluateElement::test_attributes_not_required_for_text_match",
|
| 53 |
+
"tests/test_evaluation.py::TestEvaluateElement::test_empty_annotation",
|
| 54 |
+
"tests/test_evaluation.py::TestEvaluateElement::test_exact_match_mode",
|
| 55 |
+
"tests/test_evaluation.py::TestEvaluateElement::test_missing_span_reduces_recall",
|
| 56 |
+
"tests/test_evaluation.py::TestEvaluateElement::test_perfect_annotation",
|
| 57 |
+
"tests/test_evaluation.py::TestEvaluateElement::test_plain_text_element",
|
| 58 |
+
"tests/test_evaluation.py::TestEvaluateElement::test_spurious_span_reduces_precision",
|
| 59 |
+
"tests/test_evaluation.py::TestEvaluateElement::test_wrong_element_type",
|
| 60 |
+
"tests/test_evaluation.py::TestExtractSpans::test_attributes_preserved",
|
| 61 |
+
"tests/test_evaluation.py::TestExtractSpans::test_element_with_text_and_tail",
|
| 62 |
+
"tests/test_evaluation.py::TestExtractSpans::test_flat_two_elements",
|
| 63 |
+
"tests/test_evaluation.py::TestExtractSpans::test_namespace_stripped",
|
| 64 |
+
"tests/test_evaluation.py::TestExtractSpans::test_nested_elements",
|
| 65 |
+
"tests/test_evaluation.py::TestExtractSpans::test_no_children",
|
| 66 |
+
"tests/test_evaluation.py::TestExtractSpans::test_normalized_text_property",
|
| 67 |
+
"tests/test_evaluation.py::TestExtractSpans::test_plain_text_equals_itertext",
|
| 68 |
+
"tests/test_evaluation.py::TestMatchSpans::test_both_empty",
|
| 69 |
+
"tests/test_evaluation.py::TestMatchSpans::test_empty_gold",
|
| 70 |
+
"tests/test_evaluation.py::TestMatchSpans::test_empty_pred",
|
| 71 |
+
"tests/test_evaluation.py::TestMatchSpans::test_exact_perfect_match",
|
| 72 |
+
"tests/test_evaluation.py::TestMatchSpans::test_exact_wrong_element",
|
| 73 |
+
"tests/test_evaluation.py::TestMatchSpans::test_exact_wrong_offset",
|
| 74 |
+
"tests/test_evaluation.py::TestMatchSpans::test_greedy_each_span_matched_once",
|
| 75 |
+
"tests/test_evaluation.py::TestMatchSpans::test_overlap_mode_above_threshold",
|
| 76 |
+
"tests/test_evaluation.py::TestMatchSpans::test_overlap_mode_below_threshold",
|
| 77 |
+
"tests/test_evaluation.py::TestMatchSpans::test_text_mode_different_text_no_match",
|
| 78 |
+
"tests/test_evaluation.py::TestMatchSpans::test_text_mode_matches_despite_offset_difference",
|
| 79 |
+
"tests/test_evaluation.py::TestMatchSpans::test_text_mode_normalises_whitespace",
|
| 80 |
+
"tests/test_injector.py::test_attrs_rendered_in_tag",
|
| 81 |
+
"tests/test_injector.py::test_build_nesting_tree_siblings",
|
| 82 |
+
"tests/test_injector.py::test_build_nesting_tree_simple",
|
| 83 |
+
"tests/test_injector.py::test_nested_spans",
|
| 84 |
+
"tests/test_injector.py::test_no_spans_returns_source",
|
| 85 |
+
"tests/test_injector.py::test_overlapping_spans_warns_and_skips",
|
| 86 |
+
"tests/test_injector.py::test_single_span",
|
| 87 |
+
"tests/test_injector.py::test_span_at_end_of_text",
|
| 88 |
+
"tests/test_injector.py::test_span_at_start_of_text",
|
| 89 |
+
"tests/test_injector.py::test_span_covering_entire_text",
|
| 90 |
+
"tests/test_injector.py::test_two_non_overlapping_spans",
|
| 91 |
+
"tests/test_parser.py::test_attrs_defaults_to_empty_dict",
|
| 92 |
+
"tests/test_parser.py::test_invalid_json_no_retry_raises",
|
| 93 |
+
"tests/test_parser.py::test_markdown_fenced_json_parsed",
|
| 94 |
+
"tests/test_parser.py::test_missing_fields_items_skipped",
|
| 95 |
+
"tests/test_parser.py::test_non_list_response_raises",
|
| 96 |
+
"tests/test_parser.py::test_retry_still_invalid_raises",
|
| 97 |
+
"tests/test_parser.py::test_retry_triggered_on_first_failure",
|
| 98 |
+
"tests/test_parser.py::test_strip_fences_json_lang",
|
| 99 |
+
"tests/test_parser.py::test_strip_fences_no_fences",
|
| 100 |
+
"tests/test_parser.py::test_strip_fences_no_lang",
|
| 101 |
+
"tests/test_parser.py::test_strip_fences_with_preamble",
|
| 102 |
+
"tests/test_parser.py::test_valid_json_parsed_directly",
|
| 103 |
+
"tests/test_pipeline.py::test_annotate_empty_response",
|
| 104 |
+
"tests/test_pipeline.py::test_annotate_escapes_bare_ampersand",
|
| 105 |
+
"tests/test_pipeline.py::test_annotate_fuzzy_spans_surfaced",
|
| 106 |
+
"tests/test_pipeline.py::test_annotate_no_text_modification",
|
| 107 |
+
"tests/test_pipeline.py::test_annotate_preserves_existing_entity_references",
|
| 108 |
+
"tests/test_pipeline.py::test_annotate_preserves_existing_xml",
|
| 109 |
+
"tests/test_pipeline.py::test_annotate_smoke",
|
| 110 |
+
"tests/test_pipeline.py::test_annotate_text_generation_endpoint",
|
| 111 |
+
"tests/test_pipeline.py::test_no_duplicate_tags_when_same_element_detected",
|
| 112 |
+
"tests/test_pipeline.py::test_overlapping_spans_from_chunks_are_merged",
|
| 113 |
+
"tests/test_resolver.py::test_attrs_preserved",
|
| 114 |
+
"tests/test_resolver.py::test_children_start_empty",
|
| 115 |
+
"tests/test_resolver.py::test_context_not_found_rejected",
|
| 116 |
+
"tests/test_resolver.py::test_direct_fallback_when_fuzzy_context_misses",
|
| 117 |
+
"tests/test_resolver.py::test_empty_span_list",
|
| 118 |
+
"tests/test_resolver.py::test_exact_context_match",
|
| 119 |
+
"tests/test_resolver.py::test_fuzzy_text_fallback_when_newline_space_mismatch",
|
| 120 |
+
"tests/test_resolver.py::test_multiple_spans_resolved",
|
| 121 |
+
"tests/test_resolver.py::test_source_slice_verified",
|
| 122 |
+
"tests/test_resolver.py::test_text_equals_context_with_whitespace_diff",
|
| 123 |
+
"tests/test_resolver.py::test_text_not_in_context_window_rejected",
|
| 124 |
+
"tests/test_tei.py::test_biblstruct_depth0_excludes_children",
|
| 125 |
+
"tests/test_tei.py::test_biblstruct_depth1_includes_children",
|
| 126 |
+
"tests/test_tei.py::test_biblstruct_description",
|
| 127 |
+
"tests/test_tei.py::test_biblstruct_direct_children_present",
|
| 128 |
+
"tests/test_tei.py::test_biblstruct_has_type_attribute",
|
| 129 |
+
"tests/test_tei.py::test_biblstruct_in_schema",
|
| 130 |
+
"tests/test_tei.py::test_biblstruct_model_group_children_expanded",
|
| 131 |
+
"tests/test_tei.py::test_create_schema_returns_tei_schema",
|
| 132 |
+
"tests/test_tei.py::test_create_schema_unknown_element_raises",
|
| 133 |
+
"tests/test_tei.py::test_idno_description",
|
| 134 |
+
"tests/test_tei.py::test_idno_in_schema",
|
| 135 |
+
"tests/test_tei.py::test_idno_self_referential_child",
|
| 136 |
+
"tests/test_tei.py::test_idno_type_attribute_with_allowed_values",
|
| 137 |
+
"tests/test_tei.py::test_no_duplicate_attributes_on_element",
|
| 138 |
+
"tests/test_tei.py::test_no_duplicate_elements_in_schema",
|
| 139 |
+
"tests/test_validator.py::test_empty_span_list",
|
| 140 |
+
"tests/test_validator.py::test_free_string_attribute_passes",
|
| 141 |
+
"tests/test_validator.py::test_invalid_attribute_value_rejected",
|
| 142 |
+
"tests/test_validator.py::test_leading_space_absorbed_into_span_boundary_normalises",
|
| 143 |
+
"tests/test_validator.py::test_leading_trailing_whitespace_stripped_both_sides",
|
| 144 |
+
"tests/test_validator.py::test_multiline_source_normalises_same",
|
| 145 |
+
"tests/test_validator.py::test_multiple_spaces_in_source_normalise",
|
| 146 |
+
"tests/test_validator.py::test_out_of_bounds_span_rejected",
|
| 147 |
+
"tests/test_validator.py::test_space_dropped_between_words_raises",
|
| 148 |
+
"tests/test_validator.py::test_tab_in_source_normalises",
|
| 149 |
+
"tests/test_validator.py::test_trailing_space_shifted_outside_span_normalises",
|
| 150 |
+
"tests/test_validator.py::test_unknown_attribute_rejected",
|
| 151 |
+
"tests/test_validator.py::test_unknown_element_rejected",
|
| 152 |
+
"tests/test_validator.py::test_valid_constrained_attribute_passes",
|
| 153 |
+
"tests/test_validator.py::test_valid_span_passes",
|
| 154 |
+
"tests/test_validator.py::test_validate_output_dropped_word_raises",
|
| 155 |
+
"tests/test_validator.py::test_validate_output_duplicated_word_raises",
|
| 156 |
+
"tests/test_validator.py::test_validate_output_empty_passes",
|
| 157 |
+
"tests/test_validator.py::test_validate_output_error_contains_diff",
|
| 158 |
+
"tests/test_validator.py::test_validate_output_multiple_tags_passes",
|
| 159 |
+
"tests/test_validator.py::test_validate_output_plain_source_passes",
|
| 160 |
+
"tests/test_validator.py::test_validate_output_tags_injected_passes",
|
| 161 |
+
"tests/test_validator.py::test_validate_output_whitespace_difference_passes"
|
| 162 |
+
]
|
.python-version
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
3.12
|
.releaserc.json
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"branches": ["main"],
|
| 3 |
+
"plugins": [
|
| 4 |
+
"@semantic-release/commit-analyzer",
|
| 5 |
+
"@semantic-release/release-notes-generator",
|
| 6 |
+
[
|
| 7 |
+
"@semantic-release/changelog",
|
| 8 |
+
{
|
| 9 |
+
"changelogFile": "CHANGELOG.md"
|
| 10 |
+
}
|
| 11 |
+
],
|
| 12 |
+
[
|
| 13 |
+
"@semantic-release/exec",
|
| 14 |
+
{
|
| 15 |
+
"prepareCmd": "python scripts/version.py ${nextRelease.version}"
|
| 16 |
+
}
|
| 17 |
+
],
|
| 18 |
+
[
|
| 19 |
+
"@semantic-release/git",
|
| 20 |
+
{
|
| 21 |
+
"assets": [
|
| 22 |
+
"package.json",
|
| 23 |
+
"package-lock.json",
|
| 24 |
+
"pyproject.toml",
|
| 25 |
+
"uv.lock",
|
| 26 |
+
"tei_annotator/__init__.py",
|
| 27 |
+
"CHANGELOG.md"
|
| 28 |
+
],
|
| 29 |
+
"message": "chore(release): ${nextRelease.version} [skip ci]\n\n${nextRelease.notes}"
|
| 30 |
+
}
|
| 31 |
+
],
|
| 32 |
+
"@semantic-release/github"
|
| 33 |
+
]
|
| 34 |
+
}
|
CHANGELOG.md
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# [1.5.0](https://github.com/cboulanger/tei-annotator/compare/v1.4.0...v1.5.0) (2026-05-15)
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
### Bug Fixes
|
| 5 |
+
|
| 6 |
+
* drop schema-element tags from restore map to prevent invalid nesting ([f047302](https://github.com/cboulanger/tei-annotator/commit/f04730209b69d829eac2a62a50931100f84431f4)), closes [#2](https://github.com/cboulanger/tei-annotator/issues/2) [#4](https://github.com/cboulanger/tei-annotator/issues/4)
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
### Features
|
| 10 |
+
|
| 11 |
+
* add interactive annotation debugger script ([42ee837](https://github.com/cboulanger/tei-annotator/commit/42ee837f329742ab69154b148f336e3778c43f9c))
|
| 12 |
+
|
| 13 |
+
# [1.4.0](https://github.com/cboulanger/tei-annotator/compare/v1.3.1...v1.4.0) (2026-05-15)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
### Features
|
| 17 |
+
|
| 18 |
+
* validate injected XML text content matches source after tag stripping ([c749627](https://github.com/cboulanger/tei-annotator/commit/c749627cb5520d834046816a3c60e18cf41063f4))
|
| 19 |
+
|
| 20 |
+
## [1.3.1](https://github.com/cboulanger/tei-annotator/compare/v1.3.0...v1.3.1) (2026-05-14)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
### Bug Fixes
|
| 24 |
+
|
| 25 |
+
* downgrade diagnostic log messages from INFO to DEBUG ([e3ca37f](https://github.com/cboulanger/tei-annotator/commit/e3ca37fd260f318367062f0722a802f7422385a3)), closes [#2](https://github.com/cboulanger/tei-annotator/issues/2)
|
| 26 |
+
|
| 27 |
+
# [1.3.0](https://github.com/cboulanger/tei-annotator/compare/v1.2.0...v1.3.0) (2026-05-14)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
### Features
|
| 31 |
+
|
| 32 |
+
* add INFO-level pipeline diagnostics for issue [#2](https://github.com/cboulanger/tei-annotator/issues/2) debugging ([81dc61e](https://github.com/cboulanger/tei-annotator/commit/81dc61ec435c7bfcb1b0ea036de8b1a475e7ada3))
|
| 33 |
+
|
| 34 |
+
# [1.2.0](https://github.com/cboulanger/tei-annotator/compare/v1.1.1...v1.2.0) (2026-05-14)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
### Features
|
| 38 |
+
|
| 39 |
+
* add warning for span resolver context mismatches ([11fc401](https://github.com/cboulanger/tei-annotator/commit/11fc401dd0ca172306b94f7a0743d22d5a63f5a3)), closes [#2](https://github.com/cboulanger/tei-annotator/issues/2)
|
| 40 |
+
|
| 41 |
+
## [1.1.1](https://github.com/cboulanger/tei-annotator/compare/v1.1.0...v1.1.1) (2026-05-14)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
### Bug Fixes
|
| 45 |
+
|
| 46 |
+
* merge overlapping spans from chunks to prevent text reordering ([4b612a1](https://github.com/cboulanger/tei-annotator/commit/4b612a1fb5c7fe0114580b453cb0f5c8e7d52ab2)), closes [#2](https://github.com/cboulanger/tei-annotator/issues/2)
|
| 47 |
+
|
| 48 |
+
# [1.1.0](https://github.com/cboulanger/tei-annotator/compare/v1.0.0...v1.1.0) (2026-05-14)
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
### Features
|
| 52 |
+
|
| 53 |
+
* **webservice:** fix timeout handling, reduce default LLM timeout to 60s ([d499dab](https://github.com/cboulanger/tei-annotator/commit/d499dabfbc04e3cb94aa34512b5b8d782e69c82b))
|
| 54 |
+
|
| 55 |
+
# 1.0.0 (2026-05-14)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
### Bug Fixes
|
| 59 |
+
|
| 60 |
+
* add [@spaces](https://github.com/spaces).GPU decorator to satisfy ZeroGPU spaces check; graceful fallback when spaces not installed ([40d8c92](https://github.com/cboulanger/tei-annotator/commit/40d8c92be6089d52b468f9004582e9f21e7759b7))
|
| 61 |
+
* Add back gemini 2.0 flash model ([e7ad4b5](https://github.com/cboulanger/tei-annotator/commit/e7ad4b5e5334628e95851c0c9a02d53af04a0b44))
|
| 62 |
+
* Add rate limiter to Kisski connector ([cfaba49](https://github.com/cboulanger/tei-annotator/commit/cfaba49b966649d17401ea5af06c995f7ceda375))
|
| 63 |
+
* catch exceptions in do_evaluate to show error in UI instead of crashing ZeroGPU runtime ([82b98f7](https://github.com/cboulanger/tei-annotator/commit/82b98f75f16cfcc739133624e9074c34ca025d94))
|
| 64 |
+
* disable SSR mode in Gradio launch to prevent Node.js server crash on HF Spaces ([01465bb](https://github.com/cboulanger/tei-annotator/commit/01465bbc41a7cb7ebca30c566fd1cf659a933ef9))
|
| 65 |
+
* escape bare & in text nodes without double-encoding existing entities ([7a987f8](https://github.com/cboulanger/tei-annotator/commit/7a987f8743a595aadaeb3050c23fcc606053e834))
|
| 66 |
+
* explicitly set hardware: cpu-basic in Space metadata to suppress spaces.GPU check ([3c798ff](https://github.com/cboulanger/tei-annotator/commit/3c798ff051e7d404c85efa029213cde3a4f8a342))
|
| 67 |
+
* Fix config files ([0b7260e](https://github.com/cboulanger/tei-annotator/commit/0b7260eaa7ac5a61a50ce5c51e0f3f2a316565e1))
|
| 68 |
+
* increase [@spaces](https://github.com/spaces).GPU timeout to 300s to avoid GPU task abort on slow LLM calls ([a395ade](https://github.com/cboulanger/tei-annotator/commit/a395adecb93fee97c78579fc15b2129f7bd77a1d))
|
| 69 |
+
* Increase timeout ([4006797](https://github.com/cboulanger/tei-annotator/commit/4006797de91175d610ac26ea5f8e0cacbd317275))
|
| 70 |
+
* prompt rule improvements from 2026-05-08 evaluation experiments ([d26a27c](https://github.com/cboulanger/tei-annotator/commit/d26a27c9c5b9d92e2855636c1d2ceddd0d5aea82))
|
| 71 |
+
* remove local package install from requirements.txt (HF Spaces copies source directly) ([abc28a1](https://github.com/cboulanger/tei-annotator/commit/abc28a1feb52de3989fb1a6e0b83dd54081f32c9))
|
| 72 |
+
* rename EvaluateRequest.schema β schema_id, guard response construction, raise keepalive ([1c4c16a](https://github.com/cboulanger/tei-annotator/commit/1c4c16aeec1afc37180ed6cb8b3ad5efbe7e3912))
|
| 73 |
+
* replace editable install (-e .) with plain . in requirements.txt for HF Spaces compatibility ([0dcfc4c](https://github.com/cboulanger/tei-annotator/commit/0dcfc4c840ff72ea99f24c7d5188e48dee6e9359))
|
| 74 |
+
* sync batch size with sample size in gradio app ([1659d98](https://github.com/cboulanger/tei-annotator/commit/1659d98a3820a251581bf3c7217aff44c6b7abf2))
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
### Features
|
| 78 |
+
|
| 79 |
+
* Add batch size configuration in api and frontends ([b530e33](https://github.com/cboulanger/tei-annotator/commit/b530e336135d488aa77fb37d24da07346f993941))
|
| 80 |
+
* Add Gradio app for HF Spaces deployment ([331a802](https://github.com/cboulanger/tei-annotator/commit/331a80280d3a409237da960383ea7532698a19ae))
|
| 81 |
+
* Add registry to support any kind of inference provider ([f65b650](https://github.com/cboulanger/tei-annotator/commit/f65b6501074293efba4c9597f860e0b49874b997))
|
| 82 |
+
* Add security against malicious clients ([a1785f7](https://github.com/cboulanger/tei-annotator/commit/a1785f73c2c4212a6fec304217f2cce801e9c629))
|
| 83 |
+
* Add webservice for demonstration ([c3f33b6](https://github.com/cboulanger/tei-annotator/commit/c3f33b679e71429229bde937ae38a36689c7da53))
|
| 84 |
+
* cert="low" uncertain-boundary evaluation mechanism ([6826850](https://github.com/cboulanger/tei-annotator/commit/68268504ee31c64530bcc8e0fed9fb0bf816e08c))
|
| 85 |
+
* collect_hard_examples.py β find challenging gold examples via mini-batch evaluation ([ccfad34](https://github.com/cboulanger/tei-annotator/commit/ccfad34450269f03d4c17993081a6606a24c1e02))
|
| 86 |
+
* separate evaluation corpora from tests; add schema/corpus selection to webservice ([796d53e](https://github.com/cboulanger/tei-annotator/commit/796d53ee0f9c8a71675f57403436d46eff02453e))
|
| 87 |
+
* Support more providers ([18a607c](https://github.com/cboulanger/tei-annotator/commit/18a607c518e3c18a56948169cf31b3df6395ca44))
|
| 88 |
+
* **webservice:** show-examples mode, model status indicators, hard LLM timeout ([90e4492](https://github.com/cboulanger/tei-annotator/commit/90e4492189940156bbeb84158663b1eefc3be2bb))
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
### Performance Improvements
|
| 92 |
+
|
| 93 |
+
* cache blbl schema at module load instead of rebuilding per request ([4ba8656](https://github.com/cboulanger/tei-annotator/commit/4ba865620dd719b9972bbed58668a3240cba1d33))
|
CLAUDE.md
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CLAUDE.md
|
| 2 |
+
|
| 3 |
+
## Package manager
|
| 4 |
+
|
| 5 |
+
Uses `uv`. Run tests with `uv run pytest`. Install deps with `uv sync` (add `--extra gliner` or `--extra webservice` for optional extras). API keys go in `.env` (copy from `.env.template`).
|
| 6 |
+
|
| 7 |
+
`gh` is available for GitHub operations (issues, PRs, etc.).
|
| 8 |
+
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
## Project layout
|
| 12 |
+
|
| 13 |
+
```
|
| 14 |
+
tei_annotator/ core library
|
| 15 |
+
models/ TEIAttribute, TEIElement, TEISchema; SpanDescriptor, ResolvedSpan
|
| 16 |
+
inference/ EndpointConfig, EndpointCapability
|
| 17 |
+
chunking/ chunk_text()
|
| 18 |
+
detection/ detect_spans() β GLiNER pre-detection (needs [gliner] extra)
|
| 19 |
+
prompting/ build_prompt(), make_correction_prompt(); Jinja2 templates
|
| 20 |
+
postprocessing/ parse_response(), resolve_spans(), validate_spans(), inject_xml()
|
| 21 |
+
schemas/ build_bibl_schema(), build_bibl_reference_segmenter_schema()
|
| 22 |
+
registry.py SCHEMA_REGISTRY β maps schema key β build fn + root/child elements
|
| 23 |
+
providers/ LLM connectors: hf / gemini / kisski / openai / claude
|
| 24 |
+
evaluation/ EvaluationSpan, extract_spans(), compute_metrics(), evaluate_file()
|
| 25 |
+
pipeline.py annotate() β top-level entry point
|
| 26 |
+
tei.py create_schema() β parse RNG β TEISchema
|
| 27 |
+
|
| 28 |
+
scripts/
|
| 29 |
+
evaluate_llm.py run any provider against a gold-standard TEI file
|
| 30 |
+
debug_annotation.py step-by-step pipeline debug for a single text snippet
|
| 31 |
+
smoke_test_llm.py quick connectivity check
|
| 32 |
+
smoke_test_webservice.py
|
| 33 |
+
|
| 34 |
+
tests/
|
| 35 |
+
test_*.py unit tests (fully mocked, < 0.5 s) β run with: uv run pytest
|
| 36 |
+
integration/ real GLiNER / end-to-end tests (excluded from CI by default)
|
| 37 |
+
|
| 38 |
+
data/
|
| 39 |
+
corpus/ git-tracked gold-standard TEI corpora (bibl.default.tei.xml, etc.)
|
| 40 |
+
raw/ gitignored raw source batches and collected hard examples
|
| 41 |
+
|
| 42 |
+
webservice/ FastAPI JSON API + browser UI
|
| 43 |
+
|
| 44 |
+
docs/ see Documentation section below
|
| 45 |
+
```
|
| 46 |
+
|
| 47 |
+
---
|
| 48 |
+
|
| 49 |
+
## Key design rules
|
| 50 |
+
|
| 51 |
+
- The LLM prompt talks about **spans** (emit a span / cover a span), never XML tags. Schema descriptions must match this vocabulary.
|
| 52 |
+
- `SpanDescriptor` is always **flat** β no nesting. `ResolvedSpan.children` is populated later by the injector.
|
| 53 |
+
- Source text is **never modified** by any model call.
|
| 54 |
+
- Cross-element constraints belong in `TEISchema.rules` (rendered as numbered "General Rules" before element descriptions), not duplicated inside individual element descriptions.
|
| 55 |
+
|
| 56 |
+
---
|
| 57 |
+
|
| 58 |
+
## Debugging annotation bugs
|
| 59 |
+
|
| 60 |
+
When a text snippet is annotated incorrectly, run `debug_annotation.py` **before**
|
| 61 |
+
touching any code. It executes the full pipeline step-by-step and prints every
|
| 62 |
+
intermediate result so you can pinpoint exactly where accuracy is lost.
|
| 63 |
+
|
| 64 |
+
```bash
|
| 65 |
+
uv run scripts/debug_annotation.py --text "<failing snippet>"
|
| 66 |
+
# pass --show-prompt to inspect the full LLM prompt
|
| 67 |
+
# pass --provider / --model to test a different model
|
| 68 |
+
```
|
| 69 |
+
|
| 70 |
+
**Read the output top-to-bottom and identify the first stage where the problem
|
| 71 |
+
appears:**
|
| 72 |
+
|
| 73 |
+
| Stage | What to look for | Likely fix |
|
| 74 |
+
| --- | --- | --- |
|
| 75 |
+
| **Parsed spans** | LLM emitted the wrong element, wrong text, or missing span | Improve the element description or schema rules |
|
| 76 |
+
| **Resolved spans** | Span parsed correctly but not resolved (context mismatch) | LLM's context string doesn't match source β improve prompt or context instructions |
|
| 77 |
+
| **Validated spans** | Resolved but rejected (unknown element / bad attribute value) | Schema element name or attribute value list is wrong |
|
| 78 |
+
| **Final XML** | All spans correct but XML is malformed or nesting is wrong | `inject_xml` / injector issue |
|
| 79 |
+
|
| 80 |
+
Only fix schema descriptions or rules (in `tei_annotator/schemas/`) to address
|
| 81 |
+
**Parsed spans** problems. Do not patch the pipeline code for prompt-quality issues.
|
| 82 |
+
After changing schema descriptions, re-run the debugger on the same snippet to
|
| 83 |
+
confirm the fix, then run the evaluator to check for regressions.
|
| 84 |
+
|
| 85 |
+
---
|
| 86 |
+
|
| 87 |
+
## Running the evaluator
|
| 88 |
+
|
| 89 |
+
```bash
|
| 90 |
+
# quick run: 5 records, gemini, bibl-reference-segmenter schema
|
| 91 |
+
uv run scripts/evaluate_llm.py \
|
| 92 |
+
--provider gemini --schema bibl-reference-segmenter --max-items 5 --verbose
|
| 93 |
+
|
| 94 |
+
# re-run only failing records
|
| 95 |
+
uv run scripts/evaluate_llm.py --verbose --match-mode overlap \
|
| 96 |
+
--grep "Creed|Robins" --provider kisski
|
| 97 |
+
|
| 98 |
+
# all providers, all records
|
| 99 |
+
uv run scripts/evaluate_llm.py --schema bibl --output-file results.txt
|
| 100 |
+
```
|
| 101 |
+
|
| 102 |
+
Key flags: `--provider`, `--model`, `--schema`, `--gold-file`, `--max-items`,
|
| 103 |
+
`--batch-size`, `--match-mode`, `--verbose`, `--grep`, `--shuffle`, `--timeout`.
|
| 104 |
+
|
| 105 |
+
---
|
| 106 |
+
|
| 107 |
+
## Skills
|
| 108 |
+
|
| 109 |
+
**`/optimize-element-descriptions`** β iterative workflow for improving schema prompt rules and element descriptions to maximise F1 against a gold standard. Includes guidance for handling genuinely ambiguous gold boundaries via `cert="low"`. See [.claude/skills/optimize-element-descriptions/SKILL.md](.claude/skills/optimize-element-descriptions/SKILL.md).
|
| 110 |
+
|
| 111 |
+
---
|
| 112 |
+
|
| 113 |
+
## Documentation
|
| 114 |
+
|
| 115 |
+
### Module READMEs
|
| 116 |
+
|
| 117 |
+
| Path | Topic |
|
| 118 |
+
|------|-------|
|
| 119 |
+
| [tei_annotator/models/README.md](tei_annotator/models/README.md) | TEISchema, TEIElement, TEIAttribute; SpanDescriptor, ResolvedSpan |
|
| 120 |
+
| [tei_annotator/detection/README.md](tei_annotator/detection/README.md) | GLiNER pre-detection |
|
| 121 |
+
| [tei_annotator/chunking/README.md](tei_annotator/chunking/README.md) | Text chunking strategy |
|
| 122 |
+
| [tei_annotator/prompting/README.md](tei_annotator/prompting/README.md) | Prompt templates and builder |
|
| 123 |
+
| [tei_annotator/inference/README.md](tei_annotator/inference/README.md) | EndpointConfig; provider setup examples |
|
| 124 |
+
| [tei_annotator/postprocessing/README.md](tei_annotator/postprocessing/README.md) | Parse β resolve β validate β inject pipeline |
|
| 125 |
+
| [tei_annotator/schemas/README.md](tei_annotator/schemas/README.md) | Built-in schemas, registry, adding a new schema |
|
| 126 |
+
| [tei_annotator/providers/README.md](tei_annotator/providers/README.md) | LLM connectors, adding a new provider |
|
| 127 |
+
| [tei_annotator/evaluation/README.md](tei_annotator/evaluation/README.md) | Evaluation flow, match modes, metrics, `cert="low"` uncertain-boundary handling |
|
| 128 |
+
| [webservice/README.md](webservice/README.md) | FastAPI webservice setup and API |
|
| 129 |
+
|
| 130 |
+
### Guides
|
| 131 |
+
|
| 132 |
+
| Path | Topic |
|
| 133 |
+
|------|-------|
|
| 134 |
+
| [docs/tei-element-descriptions.md](docs/tei-element-descriptions.md) | Evidence-based guidelines for writing effective TEIElement descriptions |
|
| 135 |
+
|
| 136 |
+
### Experiments
|
| 137 |
+
|
| 138 |
+
| Path | Summary |
|
| 139 |
+
|------|---------|
|
| 140 |
+
| [docs/experiments/evaluation-results.md](docs/experiments/evaluation-results.md) | Running evaluation results table across models and schemas |
|
| 141 |
+
| [docs/experiments/batch-annotation-experiment.md](docs/experiments/batch-annotation-experiment.md) | Batching multiple records per LLM call to reduce latency |
|
| 142 |
+
| [docs/experiments/2026-05-08-gemini-kisski-bibl-refseg.md](docs/experiments/2026-05-08-gemini-kisski-bibl-refseg.md) | Gemini 2.0 Flash vs KISSKI/Qwen3-Coder on bibl and bibl-reference-segmenter |
|
| 143 |
+
| [docs/experiments/2026-05-08-kisski-model-comparison-bibl-refseg.md](docs/experiments/2026-05-08-kisski-model-comparison-bibl-refseg.md) | KISSKI 4-model comparison on bibl-reference-segmenter |
|
| 144 |
+
|
| 145 |
+
### History
|
| 146 |
+
|
| 147 |
+
| Path | Topic |
|
| 148 |
+
|------|-------|
|
| 149 |
+
| [docs/history/implementation-plan.md](docs/history/implementation-plan.md) | Original design and implementation plan (historical) |
|
README.md
CHANGED
|
@@ -9,6 +9,7 @@ pinned: false
|
|
| 9 |
license: mit
|
| 10 |
short_description: Annotate plain text with TEI XML tags using an LLM backend
|
| 11 |
---
|
|
|
|
| 12 |
A Python library for annotating plain text with [TEI XML](https://tei-c.org/) tags using a two-stage LLM pipeline.
|
| 13 |
|
| 14 |
1. **(Optional) GLiNER pre-detection** β fast CPU-based span labelling generates candidates for the LLM to verify and extend.
|
|
@@ -216,7 +217,6 @@ FINAL OUTPUT (annotated XML)
|
|
| 216 |
## Demo and webservice
|
| 217 |
|
| 218 |
- **HuggingFace demo:** <https://huggingface.co/spaces/cmboulanger/tei-annotator>
|
| 219 |
-
- **`app.py`** β Gradio app for HuggingFace Spaces. See [docs/huggingface-deployment.md](docs/huggingface-deployment.md).
|
| 220 |
- **`webservice/`** β FastAPI JSON API + browser UI, all five providers. See [webservice/README.md](webservice/README.md).
|
| 221 |
|
| 222 |
---
|
|
|
|
| 9 |
license: mit
|
| 10 |
short_description: Annotate plain text with TEI XML tags using an LLM backend
|
| 11 |
---
|
| 12 |
+
|
| 13 |
A Python library for annotating plain text with [TEI XML](https://tei-c.org/) tags using a two-stage LLM pipeline.
|
| 14 |
|
| 15 |
1. **(Optional) GLiNER pre-detection** β fast CPU-based span labelling generates candidates for the LLM to verify and extend.
|
|
|
|
| 217 |
## Demo and webservice
|
| 218 |
|
| 219 |
- **HuggingFace demo:** <https://huggingface.co/spaces/cmboulanger/tei-annotator>
|
|
|
|
| 220 |
- **`webservice/`** β FastAPI JSON API + browser UI, all five providers. See [webservice/README.md](webservice/README.md).
|
| 221 |
|
| 222 |
---
|
package-lock.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
package.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "tei-annotator",
|
| 3 |
+
"version": "1.5.0",
|
| 4 |
+
"description": "TEI XML annotation library using LLM pipelines",
|
| 5 |
+
"license": "MIT",
|
| 6 |
+
"private": true,
|
| 7 |
+
"repository": {
|
| 8 |
+
"type": "git",
|
| 9 |
+
"url": "https://github.com/cboulanger/tei-annotator.git"
|
| 10 |
+
},
|
| 11 |
+
"devDependencies": {
|
| 12 |
+
"@commitlint/cli": "^18.4.0",
|
| 13 |
+
"@commitlint/config-conventional": "^18.4.0",
|
| 14 |
+
"@semantic-release/changelog": "^6.0.3",
|
| 15 |
+
"@semantic-release/exec": "^6.0.3",
|
| 16 |
+
"@semantic-release/git": "^10.0.1",
|
| 17 |
+
"commitizen": "^4.3.0",
|
| 18 |
+
"cz-conventional-changelog": "^3.3.0",
|
| 19 |
+
"husky": "^8.0.3",
|
| 20 |
+
"semantic-release": "^22.0.0"
|
| 21 |
+
},
|
| 22 |
+
"config": {
|
| 23 |
+
"commitizen": {
|
| 24 |
+
"path": "./node_modules/cz-conventional-changelog"
|
| 25 |
+
}
|
| 26 |
+
},
|
| 27 |
+
"scripts": {
|
| 28 |
+
"test": "uv run pytest",
|
| 29 |
+
"semantic-release": "semantic-release",
|
| 30 |
+
"commit": "cz"
|
| 31 |
+
}
|
| 32 |
+
}
|
pyproject.toml
CHANGED
|
@@ -18,7 +18,6 @@ webservice = [
|
|
| 18 |
"uvicorn[standard]>=0.30",
|
| 19 |
"python-multipart>=0.0.9",
|
| 20 |
]
|
| 21 |
-
gradio = ["gradio>=6.9"]
|
| 22 |
|
| 23 |
[tool.pytest.ini_options]
|
| 24 |
addopts = "-m 'not integration'"
|
|
@@ -29,7 +28,6 @@ markers = [
|
|
| 29 |
[tool.taskipy.tasks]
|
| 30 |
test = "uv run pytest"
|
| 31 |
webservice = "uv run python webservice/main.py"
|
| 32 |
-
gradio = "uv run python app.py"
|
| 33 |
|
| 34 |
[dependency-groups]
|
| 35 |
dev = [
|
|
|
|
| 18 |
"uvicorn[standard]>=0.30",
|
| 19 |
"python-multipart>=0.0.9",
|
| 20 |
]
|
|
|
|
| 21 |
|
| 22 |
[tool.pytest.ini_options]
|
| 23 |
addopts = "-m 'not integration'"
|
|
|
|
| 28 |
[tool.taskipy.tasks]
|
| 29 |
test = "uv run pytest"
|
| 30 |
webservice = "uv run python webservice/main.py"
|
|
|
|
| 31 |
|
| 32 |
[dependency-groups]
|
| 33 |
dev = [
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# HuggingFace Spaces β install the package and its gradio extra.
|
| 2 |
+
# Spaces reads this file automatically; no pyproject.toml extras support needed.
|
| 3 |
+
gradio>=6.9
|
| 4 |
+
jinja2>=3.1
|
| 5 |
+
lxml>=5.0
|
| 6 |
+
python-dotenv>=1.2.2
|
| 7 |
+
rapidfuzz>=3.0
|
schema/tei-bib.rng
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tei_annotator/providers/README.md
CHANGED
|
@@ -65,7 +65,7 @@ _ALL_CONNECTORS: list[Connector] = [
|
|
| 65 |
]
|
| 66 |
```
|
| 67 |
|
| 68 |
-
That's all. The evaluate script
|
| 69 |
|
| 70 |
---
|
| 71 |
|
|
|
|
| 65 |
]
|
| 66 |
```
|
| 67 |
|
| 68 |
+
That's all. The evaluate script and webservice pick it up automatically.
|
| 69 |
|
| 70 |
---
|
| 71 |
|
webservice/nginx.conf
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Nginx reverse-proxy configuration for tei-annotator webservice.
|
| 2 |
+
#
|
| 3 |
+
# All routes are publicly accessible over HTTPS.
|
| 4 |
+
#
|
| 5 |
+
# Setup:
|
| 6 |
+
# 1. sudo cp webservice/nginx.conf /etc/nginx/sites-available/tei-annotator
|
| 7 |
+
# 2. sudo ln -s /etc/nginx/sites-available/tei-annotator /etc/nginx/sites-enabled/
|
| 8 |
+
# 3. Replace YOUR_DOMAIN with your actual domain (3 occurrences):
|
| 9 |
+
# sudo sed -i 's/YOUR_DOMAIN/your.domain.example/g' /etc/nginx/sites-available/tei-annotator
|
| 10 |
+
# 4. Install certbot if needed:
|
| 11 |
+
# sudo apt install certbot python3-certbot-nginx # Debian/Ubuntu
|
| 12 |
+
# sudo dnf install certbot python3-certbot-nginx # RHEL/Fedora
|
| 13 |
+
# 5. Obtain a Let's Encrypt certificate using --standalone (nginx must be stopped
|
| 14 |
+
# first because the cert does not exist yet and nginx refuses to start with
|
| 15 |
+
# missing ssl_certificate paths β bootstrap chicken-and-egg):
|
| 16 |
+
# sudo systemctl stop nginx
|
| 17 |
+
# sudo certbot certonly --standalone -d your.domain.example
|
| 18 |
+
# sudo systemctl start nginx
|
| 19 |
+
# 6. Verify nginx is running and auto-renewal works:
|
| 20 |
+
# sudo systemctl status nginx
|
| 21 |
+
# sudo certbot renew --dry-run
|
| 22 |
+
#
|
| 23 |
+
# To run the webservice as a systemd service, see webservice/tei-annotator.service.
|
| 24 |
+
|
| 25 |
+
# Rate limit: 6 requests/minute per IP with a burst of 10.
|
| 26 |
+
# Covers normal interactive use; blocks scripted automation.
|
| 27 |
+
limit_req_zone $binary_remote_addr zone=tei_api:10m rate=6r/m;
|
| 28 |
+
|
| 29 |
+
upstream tei_annotator {
|
| 30 |
+
server 127.0.0.1:8099;
|
| 31 |
+
keepalive 16;
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
# ββ HTTP: redirect everything to HTTPS βββββββββββββββββββββββββββββββββββββββ
|
| 35 |
+
server {
|
| 36 |
+
listen 80;
|
| 37 |
+
listen [::]:80;
|
| 38 |
+
server_name YOUR_DOMAIN;
|
| 39 |
+
|
| 40 |
+
# Let certbot's ACME challenge through, redirect everything else
|
| 41 |
+
location /.well-known/acme-challenge/ {
|
| 42 |
+
root /var/www/certbot;
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
location / {
|
| 46 |
+
return 301 https://$host$request_uri;
|
| 47 |
+
}
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
# ββ HTTPS ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 51 |
+
server {
|
| 52 |
+
listen 443 ssl http2;
|
| 53 |
+
listen [::]:443 ssl http2;
|
| 54 |
+
server_name YOUR_DOMAIN;
|
| 55 |
+
|
| 56 |
+
# Paths written by certbot --nginx (or --webroot); update if using a
|
| 57 |
+
# different certificate tool or path.
|
| 58 |
+
ssl_certificate /etc/letsencrypt/live/YOUR_DOMAIN/fullchain.pem;
|
| 59 |
+
ssl_certificate_key /etc/letsencrypt/live/YOUR_DOMAIN/privkey.pem;
|
| 60 |
+
|
| 61 |
+
# Modern TLS settings
|
| 62 |
+
ssl_protocols TLSv1.2 TLSv1.3;
|
| 63 |
+
ssl_prefer_server_ciphers on;
|
| 64 |
+
ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305;
|
| 65 |
+
ssl_session_cache shared:SSL:10m;
|
| 66 |
+
ssl_session_timeout 1d;
|
| 67 |
+
add_header Strict-Transport-Security "max-age=63072000" always;
|
| 68 |
+
|
| 69 |
+
# Reject request bodies larger than 64 KB to cap token usage.
|
| 70 |
+
client_max_body_size 64k;
|
| 71 |
+
|
| 72 |
+
location / {
|
| 73 |
+
limit_req zone=tei_api burst=10 nodelay;
|
| 74 |
+
limit_req_status 429;
|
| 75 |
+
|
| 76 |
+
proxy_pass http://tei_annotator;
|
| 77 |
+
proxy_set_header Host $host;
|
| 78 |
+
proxy_set_header X-Real-IP $remote_addr;
|
| 79 |
+
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
| 80 |
+
proxy_set_header X-Forwarded-Proto $scheme;
|
| 81 |
+
|
| 82 |
+
proxy_buffering off;
|
| 83 |
+
proxy_read_timeout 360s;
|
| 84 |
+
}
|
| 85 |
+
}
|
| 86 |
+
|