Uploaded finetuned model
- Developed by: epispasm
- License: apache-2.0
- Finetuned from model : oldhag88/qwen3.5-9b-nsfw-captioning-v5
This qwen3_5 model was trained 2x faster with Unsloth and Huggingface's TRL library.
this model needs a patch. for some reason, it may not work with lm studio due to some weird tensor thing. I will patch it when I have time
"""
patch_gguf_qwen36.py
Fixes the Qwen3.5/3.6 GGUF "missing tensor 'blk.N.attn_norm.weight'" load error.
The exporter declares an extra MTP (multi-token prediction) block in GGUF
metadata but doesn't write its tensors, so llama.cpp / LM Studio refuse to
load the file. This patch updates the metadata to match what's actually
written: block_count = real layer count, nextn_predict_layers = 0.
For Qwen3.6-27B the real text layer count is 64.
For Qwen3.5-9B (the previous workflow) it was 32.
If the script prints unexpected values when patching, run the
override-kv test (see the skill) to find the right number, then edit
EXPECTED_BLOCK_COUNT below before re-patching.
Usage:
python patch_gguf_qwen36.py
"""
import struct
from pathlib import Path
------ EDIT THESE IF NEEDED ------------------------------------------------
Folder where save_pretrained_gguf wrote the files
GGUF_DIR = Path(r"G:\models\qwen3.6-27b-epispasm_v1")
The real text-layer count for Qwen3.6-27B (per model card: 64 layers).
Change to 32 for Qwen3.5-9B, 64 for Qwen3.6-27B, etc.
EXPECTED_BLOCK_COUNT = 64
----------------------------------------------------------------------------
def patch_uint32_kv(data: bytearray, key_str: str, new_value: int) -> None:
"""Find a uint32 GGUF metadata key by name and rewrite its value in-place."""
key_bytes = key_str.encode("utf-8")
pos = data.find(key_bytes)
if pos < 0:
print(f" [skip] key not found: {key_str}")
return
after_key = pos + len(key_bytes)
val_pos = after_key + 4 # skip the uint32 type-tag field that follows the key
old = struct.unpack_from("<I", data, val_pos)[0]
struct.pack_into("<I", data, val_pos, new_value)
print(f" {key_str}: {old} -> {new_value}")
def patch_file(in_path: Path) -> Path:
"""Patch one GGUF file. Writes a sibling file with '_fixed' before '.gguf'."""
out_path = in_path.with_name(in_path.stem + "_fixed" + in_path.suffix)
print(f"\nPatching: {in_path.name}")
data = bytearray(in_path.read_bytes())
patch_uint32_kv(data, "qwen35.block_count", EXPECTED_BLOCK_COUNT)
patch_uint32_kv(data, "qwen35.nextn_predict_layers", 0)
out_path.write_bytes(data)
print(f" wrote: {out_path.name} ({out_path.stat().st_size / 1e9:.2f} GB)")
return out_path
def main() -> None:
if not GGUF_DIR.exists():
raise SystemExit(f"GGUF_DIR does not exist: {GGUF_DIR}")
# Pick up every *.gguf except the mmproj (vision encoder doesn't have this issue)
candidates = [
p for p in GGUF_DIR.glob("*.gguf")
if "mmproj" not in p.name.lower() and "_fixed" not in p.stem.lower()
]
if not candidates:
raise SystemExit(
f"No main-model .gguf files found in {GGUF_DIR}\n"
f"(looking for files like *.Q4_K_M.gguf, excluding mmproj and _fixed)"
)
print(f"Found {len(candidates)} file(s) to patch:")
for p in candidates:
print(f" - {p.name}")
for p in candidates:
patch_file(p)
print("\nDone. Use the *_fixed.gguf files with LM Studio.")
print("Delete the originals once you've confirmed the patched ones load.")
if name == "main":
main()