uzbek-trocr-line-v1
210
license:mit
by
abduazizovanozima7
Image Model
OTHER
New
210 downloads
Early-stage
Edge AI:
Mobile
Laptop
Server
Unknown
Mobile
Laptop
Server
Quick Summary
AI model with specialized capabilities.
Code Examples
🚀 Quick Startpythontransformers
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from PIL import Image
import torch
# Load model
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
model = VisionEncoderDecoderModel.from_pretrained("abduazizovanozima7/uzbek-trocr-line-v1")
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
# Read image
image = Image.open("line_image.png").convert("RGB")
# OCR
pixel_values = processor(image, return_tensors="pt").pixel_values.to(device)
generated_ids = model.generate(pixel_values, max_new_tokens=128, num_beams=4)
text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
print(text)Load modelpythontransformers
import cv2
import numpy as np
import torch
from PIL import Image
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
# Load model
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
model = VisionEncoderDecoderModel.from_pretrained("abduazizovanozima7/uzbek-trocr-line-v1")
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
model.eval()
def segment_lines(image_path):
"""Split a full page image into individual text lines."""
img = cv2.imread(image_path)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Binarize
binary = cv2.adaptiveThreshold(
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 15, 10
)
# Horizontal projection profile
h_proj = np.sum(binary, axis=1) / 255
h, w = img.shape[:2]
threshold = w * 0.02
# Find text line regions
is_text = h_proj > threshold
lines = []
in_text = False
start = 0
for i in range(len(is_text)):
if is_text[i] and not in_text:
start = i
in_text = True
elif not is_text[i] and in_text:
if i - start >= 15:
lines.append((start, i))
in_text = False
if in_text and len(is_text) - start >= 15:
lines.append((start, len(is_text)))
# Crop each line with padding
cropped = []
for s, e in lines:
y1 = max(0, s - 10)
y2 = min(h, e + 10)
cropped.append(img[y1:y2, :])
return cropped
def ocr_batch(line_images, batch_size=8):
"""Run OCR on multiple line images in batches."""
results = []
for i in range(0, len(line_images), batch_size):
batch = line_images[i:i+batch_size]
pil_imgs = [Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) for img in batch]
pixel_values = processor(pil_imgs, return_tensors="pt").pixel_values.to(device)
with torch.no_grad():
ids = model.generate(pixel_values, max_new_tokens=128, num_beams=4)
texts = processor.batch_decode(ids, skip_special_tokens=True)
results.extend([t.strip() for t in texts])
return results
def ocr_full_page(image_path):
"""Full pipeline: image → lines → OCR → text."""
lines = segment_lines(image_path)
if not lines:
return ""
texts = ocr_batch(lines)
return "\n".join(texts)
# Usage
result = ocr_full_page("handwritten_page.jpg")
print(result)Deploy This Model
Production-ready deployment in minutes
Together.ai
Instant API access to this model
Production-ready inference API. Start free, scale to millions.
Try Free APIReplicate
One-click model deployment
Run models in the cloud with simple API. No DevOps required.
Deploy NowDisclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.