import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
model_id = "Willie999/trapSTAR-gemma4"
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_id)
print("Loading model directly to CUDA memory map...")
model = AutoModelForCausalLM.from_pretrained(
model_id,
device_map="cuda:0",
torch_dtype=torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16,
)
model.eval()
messages = [
{
"role": "system",
"content": "You are Trap Star, an autonomous defensive security auditing agent. Analyze the provided code snippet, identify the vulnerability type, and write out structural recommendations."
},
{
"role": "user",
"content": """Review this function block for potential vulnerabilities:
```cpp
void process_str(char *str) {
char buffer[16];
strcpy(buffer, str);
}
```"""
}
]
print("\nProcessing chat template serialization...")
prompt_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
input_ids = tokenizer(prompt_text, return_tensors="pt").input_ids.to("cuda:0")
print("Executing direct tensor generation with expanded token limits...")
with torch.no_grad():
generated_ids = model.generate(
input_ids,
max_new_tokens=1536,
min_new_tokens=64,
temperature=0.2,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
response_tokens = generated_ids[0][input_ids.shape[-1]:]
response_text = tokenizer.decode(response_tokens, skip_special_tokens=True)
print("\n=== Trap Star Defense Output ===")
print(response_text)