query-context-pruner-multilingual-Qwen3-4B
26
license:mit
by
hotchpotch
Other
OTHER
4B params
New
26 downloads
Early-stage
Edge AI:
Mobile
Laptop
Server
9GB+ RAM
Mobile
Laptop
Server
Quick Summary
AI model with specialized capabilities.
Device Compatibility
Mobile
4-6GB RAM
Laptop
16GB RAM
Server
GPU
Minimum Recommended
4GB+ RAM
Code Examples
🚀 Quick Startpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Choose model size
MODEL_NAME = "hotchpotch/query-context-pruner-multilingual-Qwen3-4B" # Recommended (best balance)
# MODEL_NAME = "hotchpotch/query-context-pruner-multilingual-Qwen3-1.7B" # Faster alternative
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.bfloat16,
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
def create_user_prompt(query: str, contexts: list[str]) -> str:
"""Create a formatted prompt from query and context chunks."""
context_str = "\n".join([f"[{i+1}] {ctx}" for i, ctx in enumerate(contexts)])
return f"{query}\n---\n{context_str}"
# Example: Python documentation split into chunks
query = "How do you read and write files in Python?"
contexts = [
"Python is a high-level programming language known for its simplicity and readability.",
"To read a file in Python, you use the open() function with the 'r' mode. For example: with open('file.txt', 'r') as f: content = f.read()",
"Writing to files in Python also uses open() with 'w' mode for writing or 'a' mode for appending. Example: with open('file.txt', 'w') as f: f.write('Hello')",
"Python supports multiple programming paradigms including object-oriented and functional programming.",
"The 'with' statement ensures proper file handling by automatically closing files after use, preventing resource leaks.",
"Python has various built-in functions like len(), range(), and type() that are commonly used in everyday programming."
]
# Generate response
prompt = create_user_prompt(query, contexts)
messages = [{"role": "user", "content": prompt}]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(text, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=128, temperature=0.1, do_sample=True)
response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
print(f"Relevant chunks: {response}") # Expected: "2,3,5" (reading, writing, and proper file handling)
# Note: The model's output indices start from 1, not 0🚀 Quick Startpythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Choose model size
MODEL_NAME = "hotchpotch/query-context-pruner-multilingual-Qwen3-4B" # Recommended (best balance)
# MODEL_NAME = "hotchpotch/query-context-pruner-multilingual-Qwen3-1.7B" # Faster alternative
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.bfloat16,
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
def create_user_prompt(query: str, contexts: list[str]) -> str:
"""Create a formatted prompt from query and context chunks."""
context_str = "\n".join([f"[{i+1}] {ctx}" for i, ctx in enumerate(contexts)])
return f"{query}\n---\n{context_str}"
# Example: Python documentation split into chunks
query = "How do you read and write files in Python?"
contexts = [
"Python is a high-level programming language known for its simplicity and readability.",
"To read a file in Python, you use the open() function with the 'r' mode. For example: with open('file.txt', 'r') as f: content = f.read()",
"Writing to files in Python also uses open() with 'w' mode for writing or 'a' mode for appending. Example: with open('file.txt', 'w') as f: f.write('Hello')",
"Python supports multiple programming paradigms including object-oriented and functional programming.",
"The 'with' statement ensures proper file handling by automatically closing files after use, preventing resource leaks.",
"Python has various built-in functions like len(), range(), and type() that are commonly used in everyday programming."
]
# Generate response
prompt = create_user_prompt(query, contexts)
messages = [{"role": "user", "content": prompt}]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(text, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=128, temperature=0.1, do_sample=True)
response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
print(f"Relevant chunks: {response}") # Expected: "2,3,5" (reading, writing, and proper file handling)
# Note: The model's output indices start from 1, not 0Deploy This Model
Production-ready deployment in minutes
Together.ai
Instant API access to this model
Production-ready inference API. Start free, scale to millions.
Try Free APIReplicate
One-click model deployment
Run models in the cloud with simple API. No DevOps required.
Deploy NowDisclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.