nemoretriever-ocr-v1 / example.py
BoLiu's picture
update SPDX and license
e05eed1
#!/usr/bin/env python3
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
import argparse
from nemo_retriever_ocr.inference.pipeline import NemoRetrieverOCR
def main(image_path, merge_level, no_visualize, model_dir):
ocr_pipeline = NemoRetrieverOCR()
predictions = ocr_pipeline(image_path, merge_level=merge_level, visualize=not no_visualize)
print(f"Found {len(predictions)} text regions.")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run OCR inference and annotate image.")
parser.add_argument("image_path", type=str, help="Path to the input image.")
parser.add_argument(
"--merge-level",
type=str,
choices=["word", "sentence", "paragraph"],
default="paragraph",
help="Merge level for OCR output (word, sentence, paragraph).",
)
parser.add_argument("--no-visualize", action="store_true", help="Do not save the annotated image.")
parser.add_argument(
"--model-dir",
type=str,
help="Path to the model checkpoints.",
default="./checkpoints",
)
args = parser.parse_args()
main(
args.image_path,
merge_level=args.merge_level,
no_visualize=args.no_visualize,
model_dir=args.model_dir,
)