Dedicated Endpoints
Run this model inference on single tenant GPU with unmatched speed and reliability at scale.
Get help setting up a custom Dedicated Endpoints.
Talk with our engineer to get a quote for reserved GPU instances with discounts.
README
License: apache-2.0Training Script
py
import osfrom unsloth import FastModelimport torchfrom trl import SFTConfig, SFTTrainerfrom teich import mask_data, prepare_dataMAX_SEQ_LEN = 32768MODEL_NAME = "Qwen/Qwen3.5-9B"OUTPUT_DIR = "/content/drive/MyDrive/Colab/outputs-qwen-tool-sft"HUB_REPO_ID = "armand0e/Qwen3.5-9B-Coder"HF_TOKEN = os.environ.get("HF_TOKEN", "")CHAT_TEMPLATE_PATH = "qwen3.5-chat-template.jinja"model, tokenizer = FastModel.from_pretrained(model_name=MODEL_NAME,max_seq_length=MAX_SEQ_LEN,load_in_4bit=False,load_in_8bit=False,full_finetuning=False,token=HF_TOKEN,)if CHAT_TEMPLATE_PATH:with open(CHAT_TEMPLATE_PATH, "r", encoding="utf-8") as f:custom_chat_template = f.read()tokenizer.chat_template = custom_chat_templateif hasattr(tokenizer, "tokenizer") and tokenizer.tokenizer is not None:tokenizer.tokenizer.chat_template = custom_chat_templatemodel = FastModel.get_peft_model(model,finetune_vision_layers = False, # Turn off for just text!finetune_language_layers = True, # Should leave on!finetune_attention_modules = True, # Attention good for GRPOfinetune_mlp_modules = True, # Should leave on always!r = 32, # Larger = higher accuracy, but might overfitlora_alpha = 32, # Recommended alpha == r at leastlora_dropout = 0,bias = "none",random_state = 3407,)train_dataset = prepare_data({"qwen3.7-max": {"source": "armand0e/qwen3.7-max", # stupid typo i made and now this model wasn't trained on the qwen3.7-max traces :(},"chat": {"source": "TeichAI/claude-4.5-opus-high-reasoning-250x",},"opus-pi-agent": {"source": "armand0e/badlogicgames-pi-mono-opus-filtered",},"kimi-k2.6-claude-code": {"source": "armand0e/kimi-k2.6-claude-code-traces",},"chat-2": {"source": "TeichAI/Claude-Opus-4.6-Reasoning-887x"},"minimax-m3-claude-code": {"source": "armand0e/minimax-m3-claude-code-traces"},"more-opus": {"source": "armand0e/claude-opus-4.8-pi-traces"}},tokenizer,split="train",hf_token=HF_TOKEN,chat_template_kwargs={"enable_thinking": False, "preserve_thinking": True},max_length=MAX_SEQ_LEN,oversized_policy="trim_followups",tokenize=True,strict=True,)trainer = SFTTrainer(model=model,tokenizer=tokenizer,train_dataset=train_dataset,eval_dataset=None,args=SFTConfig(dataset_text_field="text",dataset_num_proc=1,max_length=MAX_SEQ_LEN,packing=False,per_device_train_batch_size=1,gradient_accumulation_steps=8,warmup_steps= 5,num_train_epochs=1,learning_rate=2e-4,logging_steps=1,save_strategy="epoch",save_total_limit=3,optim="adamw_8bit",weight_decay=0.01,#max_grad_norm=0.3,lr_scheduler_type="linear",output_dir=OUTPUT_DIR,seed=3407,report_to="none",),)trainer = mask_data(trainer,tokenizer=tokenizer,train_on_reasoning=False,train_on_final_answers=True,train_on_tools=True,)print(trainer.train_dataset.preview())trainer_stats = trainer.train(resume_from_checkpoint=False)model.push_to_hub(f"{HUB_REPO_ID}-LoRA", token=HF_TOKEN)tokenizer.push_to_hub(f"{HUB_REPO_ID}-LoRA", token=HF_TOKEN)model.push_to_hub_merged(HUB_REPO_ID, tokenizer, save_method="merged_16bit", token=HF_TOKEN)
The data for this model was easily formatted and masked with Teich
- Developed by: armand0e
- License: apache-2.0
- Finetuned from model : Qwen/Qwen3.5-9B
This qwen3_5 model was trained 2x faster with Unsloth and Huggingface's TRL library.
Model provider
armand0e
Model tree
Base
Qwen/Qwen3.5-9B
Fine-tuned
this model
Modalities
Input
Video, Text, Image
Output
Text
Pricing
Dedicated Endpoints
View detailsSupported Functionality
Model APIs
Dedicated Endpoints
Container
More information