import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
base_model_name = "Qwen/Qwen1.5-1.8B-Chat"
adapter_name = "faltooz123/qwen1.5-sql-qlora-spider"
tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
bnb_config = BitsAndBytesConfig(
load_in_4bit = True,
bnb_4bit_quant_type = "nf4",
bnb_4bit_compute_dtype = torch.float16,
)
base_model = AutoModelForCausalLM.from_pretrained(
base_model_name,
quantization_config = bnb_config,
device_map = {"": 0},
trust_remote_code = True,
)
model = PeftModel.from_pretrained(base_model, adapter_name)
model.eval()
def generate_sql(question, db_id):
prompt = (
"<|im_start|>system\n"
"You are an expert SQL assistant.<|im_end|>\n"
"<|im_start|>user\n"
f"Database: {db_id}\n"
f"Question: {question}\n"
"Write only the SQL query.<|im_end|>\n"
"<|im_start|>assistant\n"
)
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
with torch.no_grad():
outputs = model.generate(**inputs, max_new_tokens=128, do_sample=False)
generated = outputs[0][inputs["input_ids"].shape[1]:]
return tokenizer.decode(generated, skip_special_tokens=True).strip()
sql = generate_sql("How many singers do we have?", "concert_singer")
print(sql)