from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel
import torch
BASE = "mistralai/Mistral-7B-Instruct-v0.2"
ADAPTER = "anksriv/mistral-7b-medical-medqa-qlora"
bnb = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
)
model = AutoModelForCausalLM.from_pretrained(BASE, quantization_config=bnb, device_map="auto")
model = PeftModel.from_pretrained(model, ADAPTER)
tokenizer = AutoTokenizer.from_pretrained(BASE)
system = ("You are a knowledgeable medical AI assistant. "
"When given a clinical multiple-choice question, analyze the case carefully, "
"identify the correct answer (A, B, C, or D), and provide a clear explanation. "
"Always begin your response with 'The correct answer is X)' where X is the letter.")
messages = [
{"role": "system", "content": system},
{"role": "user", "content": "A 32-year-old woman presents with ... A) ... B) ... C) ... D) ..."},
]
inputs = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True).to(model.device)
out = model.generate(inputs, max_new_tokens=200, temperature=0.1, do_sample=True)
print(tokenizer.decode(out[0][inputs.shape[1]:], skip_special_tokens=True))