samsung_innovation_campus_radiology_model

42
1 language
mllama
by
Cosmobillian
Image Model
OTHER
New
42 downloads
Early-stage
Edge AI:
Mobile
Laptop
Server
Unknown
Mobile
Laptop
Server
Quick Summary

AI model with specialized capabilities.

Code Examples

👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
👨‍💻 How to Use (Inference)bashpytorch
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    import torch; v = re.match(r"[0-9\\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)
2. Run Inference with Pythonpythontransformers
from unsloth import FastVisionModel
from transformers import TextStreamer
from PIL import Image
import torch

# Load the model and tokenizer in 16-bit (float16)
# If you have less VRAM, you can use load_in_4bit=True
model, tokenizer = FastVisionModel.from_pretrained(
    "Cosmobillian/radiologist_llama",
    dtype=torch.float16,
    load_in_4bit=False, # False is ideal since the model was saved in 16-bit
)

# Prepare the model for inference
FastVisionModel.for_inference(model)

# Load your image (specify the path to your own X-ray image)
try:
    image = Image.open("path/to/your/xray.jpg")
except FileNotFoundError:
    print("Please provide a valid file path instead of 'path/to/your/xray.jpg'.")
    # Creating a blank image as a placeholder
    image = Image.new('RGB', (512, 512), 'black')


# The instruction format the model was trained on
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

# Format the messages according to the chat template
messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# Prepare the inputs with the tokenizer
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens=False, # Already present in the template
    return_tensors="pt",
).to("cuda")

# Use TextStreamer for real-time output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)

print("Model is generating the report...\n---")

# Run the model and stream the output
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=256 # Maximum number of tokens to generate
)

Deploy This Model

Production-ready deployment in minutes

Together.ai

Instant API access to this model

Fastest API

Production-ready inference API. Start free, scale to millions.

Try Free API

Replicate

One-click model deployment

Easiest Setup

Run models in the cloud with simple API. No DevOps required.

Deploy Now

Disclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.