Plano-Orchestrator-4B-FP8

7
1
by
katanemo
Language Model
OTHER
4B params
New
7 downloads
Early-stage
Edge AI:
Mobile
Laptop
Server
9GB+ RAM
Mobile
Laptop
Server
Quick Summary

AI model with specialized capabilities.

Device Compatibility

Mobile
4-6GB RAM
Laptop
16GB RAM
Server
GPU
Minimum Recommended
4GB+ RAM

Code Examples

Examplepythontransformers
import json
import torch

from transformers import AutoTokenizer, AutoModelForCausalLM


ORCHESTRATION_PROMPT = (
    "You are a helpful assistant that selects the most suitable routes based on user intent.\n"
    "You are provided with a list of available routes enclosed within <routes></routes> XML tags:\n"
    "<routes>\n{routes}\n</routes>\n\n"
    "You are also given the conversation context enclosed within <conversation></conversation> XML tags:\n"
    "<conversation>\n{conversation}\n</conversation>\n\n"
    "## Instructions\n"
    "1. Analyze the latest user intent from the conversation.\n"
    "2. Compare it against the available routes to find which routes can help fulfill the request.\n"
    "3. Respond only with the exact route names from <routes>.\n"
    "4. If no routes can help or the intent is already fulfilled, return an empty list.\n\n"
    "## Response Format\n"
    "Return your answer strictly in JSON as follows:\n"
    '{{"route": ["route_name_1", "route_name_2", "..."]}}\n'
    "If no routes are needed, return an empty list for `route`."
)

def convert_agents_to_routes(agents):
    tools = [
        {
            "name": agent["name"],
            "description": agent["description"],
        }
        for agent in agents
    ]
    return "\n".join([json.dumps(tool, ensure_ascii=False) for tool in tools])

def build_messages(available_agents, conversation):
    routes = convert_agents_to_routes(available_agents)
    conversation_str = json.dumps(conversation, indent=4, ensure_ascii=False)
    prompt = ORCHESTRATION_PROMPT.format(routes=routes, conversation=conversation_str)
    return [{"role": "user", "content": prompt}]

# Load model
model_name = "katanemo/Plano-Orchestrator-4B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)

# Define available agents
available_agents = [
    {"name": "WeatherAgent", "description": "Provides weather forecasts and current conditions for any location"},
    {"name": "CodeAgent", "description": "Generates, debugs, explains, and reviews code in multiple programming languages"}
]

# Conversation history
conversation = [
    {"role": "user", "content": "What's the weather like today?"},
    {"role": "assistant", "content": "I can help you with that. Could you tell me your location?"},
    {"role": "user", "content": "San Francisco"},
]

# Build messages and generate
model_inputs = tokenizer.apply_chat_template(
    messages, add_generation_prompt=True, return_tensors="pt", return_dict=True
).to(model.device)

generated_ids = model.generate(**model_inputs, max_new_tokens=32768)
generated_ids = [
    output_ids[len(input_ids) :]
    for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]

response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
print(response)
# Output: {"route": ["WeatherAgent"]}

Deploy This Model

Production-ready deployment in minutes

Together.ai

Instant API access to this model

Fastest API

Production-ready inference API. Start free, scale to millions.

Try Free API

Replicate

One-click model deployment

Easiest Setup

Run models in the cloud with simple API. No DevOps required.

Deploy Now

Disclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.