Qwen3-1.7B-Flux-Prompt

43
1
1.7B
license:apache-2.0
by
aifeifei798
Language Model
OTHER
1.7B params
New
43 downloads
Early-stage
Edge AI:
Mobile
Laptop
Server
4GB+ RAM
Mobile
Laptop
Server
Quick Summary

AI model with specialized capabilities.

Device Compatibility

Mobile
4-6GB RAM
Laptop
16GB RAM
Server
GPU
Minimum Recommended
2GB+ RAM

Code Examples

💻 How to Usepythontransformers
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_name = "aifeifei798/Qwen3-1.7B-Flux-Prompt" # Replace with your actual repo name

# Load the tokenizer and the model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto"
)

def generate_flux_prompt(prompt):
    
    messages = [{"role": "user", "content": prompt}]

    # Applying the template
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
        # enable_thinking=False
    )
    
    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
    
    # Generation with optimized parameters for Flux prompting
    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=512,      # [Key] 512 tokens are sufficient for a detailed visual description.
        do_sample=True,          # [Required] Enable sampling to allow for creative variations.
        temperature=0.7,         # [Creativity] 0.7 offers a balance between imaginative detail and coherence.
        top_p=0.9,               # [Focus] Nucleus sampling: filters out very unlikely words.
        top_k=50,                # [Stability] Limits vocabulary to top 50 tokens to prevent hallucinations.
        repetition_penalty=1.15,  # [Variety] Slight penalty to reduce repetitive phrases without breaking grammar.
        no_repeat_ngram_size=3, 
    )
    
    # Extracting the newly generated tokens only
    output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist() 
    
    # Parsing logic to handle potential reasoning content (if using a reasoning-capable base model)
    # Looks for the </think> token ID (usually 151668 in Qwen architecture)
    try:
        index = len(output_ids) - output_ids[::-1].index(151668)
    except ValueError:
        index = 0 # No thinking trace found, output starts from the beginning

    # Decode the final prompt
    # thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip()
    content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip()
    
    return content

# Example Usage
# if __name__ == "__main__":
    # user_input = "a sexy model at new york"
    # print(f"Input: {user_input}\n")
    # print(f"Generated Flux Prompt:\n{generate_flux_prompt(user_input)}")

Deploy This Model

Production-ready deployment in minutes

Together.ai

Instant API access to this model

Fastest API

Production-ready inference API. Start free, scale to millions.

Try Free API

Replicate

One-click model deployment

Easiest Setup

Run models in the cloud with simple API. No DevOps required.

Deploy Now

Disclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.