uzbek-trocr-line-v1

210
license:mit
by
abduazizovanozima7
Image Model
OTHER
New
210 downloads
Early-stage
Edge AI:
Mobile
Laptop
Server
Unknown
Mobile
Laptop
Server
Quick Summary

AI model with specialized capabilities.

Code Examples

🚀 Quick Startpythontransformers
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from PIL import Image
import torch

# Load model
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
model = VisionEncoderDecoderModel.from_pretrained("abduazizovanozima7/uzbek-trocr-line-v1")

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# Read image
image = Image.open("line_image.png").convert("RGB")

# OCR
pixel_values = processor(image, return_tensors="pt").pixel_values.to(device)
generated_ids = model.generate(pixel_values, max_new_tokens=128, num_beams=4)
text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

print(text)
Load modelpythontransformers
import cv2
import numpy as np
import torch
from PIL import Image
from transformers import TrOCRProcessor, VisionEncoderDecoderModel

# Load model
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
model = VisionEncoderDecoderModel.from_pretrained("abduazizovanozima7/uzbek-trocr-line-v1")
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
model.eval()

def segment_lines(image_path):
    """Split a full page image into individual text lines."""
    img = cv2.imread(image_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Binarize
    binary = cv2.adaptiveThreshold(
        gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY_INV, 15, 10
    )

    # Horizontal projection profile
    h_proj = np.sum(binary, axis=1) / 255
    h, w = img.shape[:2]
    threshold = w * 0.02

    # Find text line regions
    is_text = h_proj > threshold
    lines = []
    in_text = False
    start = 0
    for i in range(len(is_text)):
        if is_text[i] and not in_text:
            start = i
            in_text = True
        elif not is_text[i] and in_text:
            if i - start >= 15:
                lines.append((start, i))
            in_text = False
    if in_text and len(is_text) - start >= 15:
        lines.append((start, len(is_text)))

    # Crop each line with padding
    cropped = []
    for s, e in lines:
        y1 = max(0, s - 10)
        y2 = min(h, e + 10)
        cropped.append(img[y1:y2, :])

    return cropped

def ocr_batch(line_images, batch_size=8):
    """Run OCR on multiple line images in batches."""
    results = []
    for i in range(0, len(line_images), batch_size):
        batch = line_images[i:i+batch_size]
        pil_imgs = [Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) for img in batch]
        pixel_values = processor(pil_imgs, return_tensors="pt").pixel_values.to(device)
        with torch.no_grad():
            ids = model.generate(pixel_values, max_new_tokens=128, num_beams=4)
        texts = processor.batch_decode(ids, skip_special_tokens=True)
        results.extend([t.strip() for t in texts])
    return results

def ocr_full_page(image_path):
    """Full pipeline: image → lines → OCR → text."""
    lines = segment_lines(image_path)
    if not lines:
        return ""
    texts = ocr_batch(lines)
    return "\n".join(texts)

# Usage
result = ocr_full_page("handwritten_page.jpg")
print(result)

Deploy This Model

Production-ready deployment in minutes

Together.ai

Instant API access to this model

Fastest API

Production-ready inference API. Start free, scale to millions.

Try Free API

Replicate

One-click model deployment

Easiest Setup

Run models in the cloud with simple API. No DevOps required.

Deploy Now

Disclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.