YAML Metadata
Warning:
empty or missing yaml metadata in repo card
(https://huggingface.co/docs/hub/model-cards#model-card-metadata)
Quantized Nanonets-OCR-s Model π
Requirements
pip install vllm
pip install pdf2image
apt-get install -y poppler-utils
Init model. Model usage about ~5gb vRAM
from vllm import LLM, SamplingParams
from transformers import AutoTokenizer
model_name = "jester6136/Nanonets-OCR-s-w8a8"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
llm = LLM(model=model_name, trust_remote_code=True,
gpu_memory_utilization=0.5,
max_model_len=10000,
max_num_seqs=1)
Extract pdf as markdown.
from PIL import Image
import io
from pdf2image import convert_from_path
from vllm import SamplingParams
from typing import List
def make_prompt(question: str) -> str:
return (
"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n"
"<|im_start|>user\n"
"<|vision_start|><|image_pad|><|vision_end|>\n"
f"{question}<|im_end|>\n"
"<|im_start|>assistant\n"
)
question_text = (
"Extract the text from the above document as if you were reading it naturally. Keep fontweight as **."
)
sampling_params = SamplingParams(
repetition_penalty=1.05,
temperature=0.0,
max_tokens=10000
)
pdf_path = "your_pdf_path.pdf"
images = convert_from_path(pdf_path)
def downscale_image(img: Image.Image, max_dim: int = 768) -> Image.Image:
width, height = img.size
if max(width, height) <= max_dim:
return img
if width > height:
new_width = max_dim
new_height = int((max_dim / width) * height)
else:
new_height = max_dim
new_width = int((max_dim / height) * width)
return img.resize((new_width, new_height), Image.LANCZOS)
# β¬οΈ Prepare batched inputs
batched_inputs = []
for page_num, image in enumerate(images):
print(f"Preparing page {page_num + 1}...")
image = downscale_image(image)
img_byte_arr = io.BytesIO()
image.save(img_byte_arr, format='PNG')
pil_image = Image.open(io.BytesIO(img_byte_arr.getvalue()))
prompt = make_prompt(question_text)
batched_inputs.append({
"prompt": prompt,
"multi_modal_data": {
"image": [pil_image]
}
})
# β¬οΈ Run batched inference
print("Running batch inference...")
batched_outputs = llm.generate(batched_inputs, sampling_params=sampling_params)
# β¬οΈ Collect results
all_outputs = []
for page_num, output in enumerate(batched_outputs):
extracted_text = output.outputs[0].text
all_outputs.append(f"<page_number>{page_num + 1}</page_number>\n{extracted_text}\n{'-'*50}")
# β¬οΈ Save to file
output_file = "/content/extracted_text.txt"
with open(output_file, "w", encoding="utf-8") as f:
f.write("\n".join(all_outputs))
print(f"β
Extracted text saved to: {output_file}")
- Downloads last month
- 12
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
π
Ask for provider support