Dedicated Endpoints

Run this model inference on single tenant GPU with unmatched speed and reliability at scale.

Learn more

Get help setting up a custom Dedicated Endpoints.

Talk with our engineer to get a quote for reserved GPU instances with discounts.

README

License: apache-2.0

Training Script

py

import os
from unsloth import FastModel
import torch
from trl import SFTConfig, SFTTrainer
from teich import mask_data, prepare_data
MAX_SEQ_LEN = 32768
MODEL_NAME = "Qwen/Qwen3.5-9B"
OUTPUT_DIR = "/content/drive/MyDrive/Colab/outputs-qwen-tool-sft"
HUB_REPO_ID = "armand0e/Qwen3.5-9B-Coder"
HF_TOKEN = os.environ.get("HF_TOKEN", "")
CHAT_TEMPLATE_PATH = "qwen3.5-chat-template.jinja"
model, tokenizer = FastModel.from_pretrained(
model_name=MODEL_NAME,
max_seq_length=MAX_SEQ_LEN,
load_in_4bit=False,
load_in_8bit=False,
full_finetuning=False,
token=HF_TOKEN,
)
if CHAT_TEMPLATE_PATH:
with open(CHAT_TEMPLATE_PATH, "r", encoding="utf-8") as f:
custom_chat_template = f.read()
tokenizer.chat_template = custom_chat_template
if hasattr(tokenizer, "tokenizer") and tokenizer.tokenizer is not None:
tokenizer.tokenizer.chat_template = custom_chat_template
model = FastModel.get_peft_model(
model,
finetune_vision_layers = False, # Turn off for just text!
finetune_language_layers = True, # Should leave on!
finetune_attention_modules = True, # Attention good for GRPO
finetune_mlp_modules = True, # Should leave on always!
r = 32, # Larger = higher accuracy, but might overfit
lora_alpha = 32, # Recommended alpha == r at least
lora_dropout = 0,
bias = "none",
random_state = 3407,
)
train_dataset = prepare_data(
{
"qwen3.7-max": {
"source": "armand0e/qwen3.7-max", # stupid typo i made and now this model wasn't trained on the qwen3.7-max traces :(
},
"chat": {
"source": "TeichAI/claude-4.5-opus-high-reasoning-250x",
},
"opus-pi-agent": {
"source": "armand0e/badlogicgames-pi-mono-opus-filtered",
},
"kimi-k2.6-claude-code": {
"source": "armand0e/kimi-k2.6-claude-code-traces",
},
"chat-2": {
"source": "TeichAI/Claude-Opus-4.6-Reasoning-887x"
},
"minimax-m3-claude-code": {
"source": "armand0e/minimax-m3-claude-code-traces"
},
"more-opus": {
"source": "armand0e/claude-opus-4.8-pi-traces"
}
},
tokenizer,
split="train",
hf_token=HF_TOKEN,
chat_template_kwargs={"enable_thinking": False, "preserve_thinking": True},
max_length=MAX_SEQ_LEN,
oversized_policy="trim_followups",
tokenize=True,
strict=True,
)
trainer = SFTTrainer(
model=model,
tokenizer=tokenizer,
train_dataset=train_dataset,
eval_dataset=None,
args=SFTConfig(
dataset_text_field="text",
dataset_num_proc=1,
max_length=MAX_SEQ_LEN,
packing=False,
per_device_train_batch_size=1,
gradient_accumulation_steps=8,
warmup_steps= 5,
num_train_epochs=1,
learning_rate=2e-4,
logging_steps=1,
save_strategy="epoch",
save_total_limit=3,
optim="adamw_8bit",
weight_decay=0.01,
#max_grad_norm=0.3,
lr_scheduler_type="linear",
output_dir=OUTPUT_DIR,
seed=3407,
report_to="none",
),
)
trainer = mask_data(
trainer,
tokenizer=tokenizer,
train_on_reasoning=False,
train_on_final_answers=True,
train_on_tools=True,
)
print(trainer.train_dataset.preview())
trainer_stats = trainer.train(resume_from_checkpoint=False)
model.push_to_hub(f"{HUB_REPO_ID}-LoRA", token=HF_TOKEN)
tokenizer.push_to_hub(f"{HUB_REPO_ID}-LoRA", token=HF_TOKEN)
model.push_to_hub_merged(HUB_REPO_ID, tokenizer, save_method="merged_16bit", token=HF_TOKEN)

The data for this model was easily formatted and masked with Teich

  • Developed by: armand0e
  • License: apache-2.0
  • Finetuned from model : Qwen/Qwen3.5-9B

This qwen3_5 model was trained 2x faster with Unsloth and Huggingface's TRL library.

Model provider

armand0e

Model tree

Base

Qwen/Qwen3.5-9B

Fine-tuned

this model

Modalities

Input

Video, Text, Image

Output

Text

Pricing

Dedicated Endpoints

View details

Supported Functionality

Model APIs

Dedicated Endpoints

Container

More information

Explore FriendliAI today