#!/Users/sij/miniforge3/envs/minicpm/bin/python
import os
import torch
from transformers import AutoModel, AutoTokenizer
from PIL import Image
import argparse

# Set environment variable to enable CPU fallback
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"

# Parse command line arguments
parser = argparse.ArgumentParser(description='Process image for model.')
parser.add_argument('image_path', type=str, help='Path to the image file')
args = parser.parse_args()

# Check if MPS is available
if torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")

# Load the model with appropriate torch dtype
model = AutoModel.from_pretrained('openbmb/MiniCPM-Llama3-V-2_5', trust_remote_code=True, torch_dtype=torch.float32 if device == torch.device("cpu") else torch.float16)
model = model.to(device)

tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-Llama3-V-2_5', trust_remote_code=True)
model.eval()

# Load and prepare the image
image_path = "/Users/sij/adorablenakedskinny.jpeg" # args.image_path
image = Image.open(image_path).convert('RGB')
question = 'Using modern English and adult slang words like cock, cum, pussy, fuck, etc., describe the scene pictured in the image.'
msgs = [{'role': 'user', 'content': question}]

# Perform inference
with torch.no_grad():
    res = model.chat(
        image=image,
        msgs=msgs,
        tokenizer=tokenizer,
        sampling=True,
        temperature=0.7
    )

print(res)