|
|
|
|
|
|
|
|
|
|
|
|
|
|
import argparse |
|
|
|
|
|
from nemo_retriever_ocr.inference.pipeline import NemoRetrieverOCR |
|
|
|
|
|
|
|
|
def main(image_path, merge_level, no_visualize, model_dir): |
|
|
ocr_pipeline = NemoRetrieverOCR() |
|
|
|
|
|
predictions = ocr_pipeline(image_path, merge_level=merge_level, visualize=not no_visualize) |
|
|
|
|
|
print(f"Found {len(predictions)} text regions.") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
parser = argparse.ArgumentParser(description="Run OCR inference and annotate image.") |
|
|
parser.add_argument("image_path", type=str, help="Path to the input image.") |
|
|
parser.add_argument( |
|
|
"--merge-level", |
|
|
type=str, |
|
|
choices=["word", "sentence", "paragraph"], |
|
|
default="paragraph", |
|
|
help="Merge level for OCR output (word, sentence, paragraph).", |
|
|
) |
|
|
parser.add_argument("--no-visualize", action="store_true", help="Do not save the annotated image.") |
|
|
parser.add_argument( |
|
|
"--model-dir", |
|
|
type=str, |
|
|
help="Path to the model checkpoints.", |
|
|
default="./checkpoints", |
|
|
) |
|
|
args = parser.parse_args() |
|
|
|
|
|
main( |
|
|
args.image_path, |
|
|
merge_level=args.merge_level, |
|
|
no_visualize=args.no_visualize, |
|
|
model_dir=args.model_dir, |
|
|
) |
|
|
|