query-context-pruner-multilingual-Qwen3-4B

26
license:mit
by
hotchpotch
Other
OTHER
4B params
New
26 downloads
Early-stage
Edge AI:
Mobile
Laptop
Server
9GB+ RAM
Mobile
Laptop
Server
Quick Summary

AI model with specialized capabilities.

Device Compatibility

Mobile
4-6GB RAM
Laptop
16GB RAM
Server
GPU
Minimum Recommended
4GB+ RAM

Code Examples

🚀 Quick Startpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Choose model size
MODEL_NAME = "hotchpotch/query-context-pruner-multilingual-Qwen3-4B"   # Recommended (best balance)
# MODEL_NAME = "hotchpotch/query-context-pruner-multilingual-Qwen3-1.7B"  # Faster alternative

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)


def create_user_prompt(query: str, contexts: list[str]) -> str:
    """Create a formatted prompt from query and context chunks."""
    context_str = "\n".join([f"[{i+1}] {ctx}" for i, ctx in enumerate(contexts)])
    return f"{query}\n---\n{context_str}"


# Example: Python documentation split into chunks
query = "How do you read and write files in Python?"
contexts = [
    "Python is a high-level programming language known for its simplicity and readability.",
    "To read a file in Python, you use the open() function with the 'r' mode. For example: with open('file.txt', 'r') as f: content = f.read()",
    "Writing to files in Python also uses open() with 'w' mode for writing or 'a' mode for appending. Example: with open('file.txt', 'w') as f: f.write('Hello')",
    "Python supports multiple programming paradigms including object-oriented and functional programming.",
    "The 'with' statement ensures proper file handling by automatically closing files after use, preventing resource leaks.",
    "Python has various built-in functions like len(), range(), and type() that are commonly used in everyday programming."
]

# Generate response
prompt = create_user_prompt(query, contexts)
messages = [{"role": "user", "content": prompt}]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(text, return_tensors="pt").to(model.device)

outputs = model.generate(**inputs, max_new_tokens=128, temperature=0.1, do_sample=True)
response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)

print(f"Relevant chunks: {response}")  # Expected: "2,3,5" (reading, writing, and proper file handling)

# Note: The model's output indices start from 1, not 0
🚀 Quick Startpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Choose model size
MODEL_NAME = "hotchpotch/query-context-pruner-multilingual-Qwen3-4B"   # Recommended (best balance)
# MODEL_NAME = "hotchpotch/query-context-pruner-multilingual-Qwen3-1.7B"  # Faster alternative

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)


def create_user_prompt(query: str, contexts: list[str]) -> str:
    """Create a formatted prompt from query and context chunks."""
    context_str = "\n".join([f"[{i+1}] {ctx}" for i, ctx in enumerate(contexts)])
    return f"{query}\n---\n{context_str}"


# Example: Python documentation split into chunks
query = "How do you read and write files in Python?"
contexts = [
    "Python is a high-level programming language known for its simplicity and readability.",
    "To read a file in Python, you use the open() function with the 'r' mode. For example: with open('file.txt', 'r') as f: content = f.read()",
    "Writing to files in Python also uses open() with 'w' mode for writing or 'a' mode for appending. Example: with open('file.txt', 'w') as f: f.write('Hello')",
    "Python supports multiple programming paradigms including object-oriented and functional programming.",
    "The 'with' statement ensures proper file handling by automatically closing files after use, preventing resource leaks.",
    "Python has various built-in functions like len(), range(), and type() that are commonly used in everyday programming."
]

# Generate response
prompt = create_user_prompt(query, contexts)
messages = [{"role": "user", "content": prompt}]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(text, return_tensors="pt").to(model.device)

outputs = model.generate(**inputs, max_new_tokens=128, temperature=0.1, do_sample=True)
response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)

print(f"Relevant chunks: {response}")  # Expected: "2,3,5" (reading, writing, and proper file handling)

# Note: The model's output indices start from 1, not 0

Deploy This Model

Production-ready deployment in minutes

Together.ai

Instant API access to this model

Fastest API

Production-ready inference API. Start free, scale to millions.

Try Free API

Replicate

One-click model deployment

Easiest Setup

Run models in the cloud with simple API. No DevOps required.

Deploy Now

Disclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.