bananafish-0517

1
3 languages
license:apache-2.0
by
marcuscedricridia
Language Model
OTHER
New
1 downloads
Early-stage
Edge AI:
Mobile
Laptop
Server
Unknown
Mobile
Laptop
Server
Quick Summary

Model Description bananafish-0517 is a proof-of-concept fine-tuned checkpoint built upon the Qwen 0.

Code Examples

Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)
Example Usagepythontransformers
from transformers import TextIteratorStreamer
import threading

def create_chatml_prompt(user_message):
    return f"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{user_message}<|im_end|>
<|im_start|>assistant
"""

user_input = "Who are you?"
prompt = create_chatml_prompt(user_input)

inputs = tokenizer([prompt], return_tensors="pt", padding=True).to("cuda")

streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(
    **inputs,
    max_new_tokens=2048,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.8,
    top_p=0.9,
    streamer=streamer,
)

thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for new_text in streamer:
    print(new_text, end="", flush=True)

Deploy This Model

Production-ready deployment in minutes

Together.ai

Instant API access to this model

Fastest API

Production-ready inference API. Start free, scale to millions.

Try Free API

Replicate

One-click model deployment

Easiest Setup

Run models in the cloud with simple API. No DevOps required.

Deploy Now

Disclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.