Gemma4ForConditionalGeneration(
(model): Gemma4Model(
(vision_tower): Gemma4VisionModel(
(patch_embedder): Gemma4VisionPatchEmbedder(
(input_proj): Linear(in_features=768, out_features=8, bias=False)
)
(encoder): Gemma4VisionEncoder(
(rotary_emb): Gemma4VisionRotaryEmbedding()
(layers): ModuleList(
(0-1): 2 x Gemma4VisionEncoderLayer(
(self_attn): Gemma4VisionAttention(
(q_proj): Gemma4ClippableLinear(
(linear): Linear(in_features=8, out_features=128, bias=False)
)
(k_proj): Gemma4ClippableLinear(
(linear): Linear(in_features=8, out_features=128, bias=False)
)
(v_proj): Gemma4ClippableLinear(
(linear): Linear(in_features=8, out_features=128, bias=False)
)
(o_proj): Gemma4ClippableLinear(
(linear): Linear(in_features=128, out_features=8, bias=False)
)
(q_norm): Gemma4RMSNorm()
(k_norm): Gemma4RMSNorm()
(v_norm): Gemma4RMSNorm()
)
(mlp): Gemma4VisionMLP(
(gate_proj): Gemma4ClippableLinear(
(linear): Linear(in_features=8, out_features=64, bias=False)
)
(up_proj): Gemma4ClippableLinear(
(linear): Linear(in_features=8, out_features=64, bias=False)
)
(down_proj): Gemma4ClippableLinear(
(linear): Linear(in_features=64, out_features=8, bias=False)
)
(act_fn): GELUTanh()
)
(input_layernorm): Gemma4RMSNorm()
(post_attention_layernorm): Gemma4RMSNorm()
(pre_feedforward_layernorm): Gemma4RMSNorm()
(post_feedforward_layernorm): Gemma4RMSNorm()
)
)
)
(pooler): Gemma4VisionPooler()
)
(language_model): Gemma4TextModel(
(embed_tokens): Gemma4TextScaledWordEmbedding(262144, 8, padding_idx=0)
(layers): ModuleList(
(0): Gemma4TextDecoderLayer(
(self_attn): Gemma4TextAttention(
(q_proj): Linear(in_features=8, out_features=256, bias=False)
(q_norm): Gemma4RMSNorm()
(k_norm): Gemma4RMSNorm()
(v_norm): Gemma4RMSNorm()
(k_proj): Linear(in_features=8, out_features=128, bias=False)
(v_proj): Linear(in_features=8, out_features=128, bias=False)
(o_proj): Linear(in_features=256, out_features=8, bias=False)
)
(mlp): Gemma4TextMLP(
(gate_proj): Linear(in_features=8, out_features=64, bias=False)
(up_proj): Linear(in_features=8, out_features=64, bias=False)
(down_proj): Linear(in_features=64, out_features=8, bias=False)
(act_fn): GELUTanh()
)
(input_layernorm): Gemma4RMSNorm()
(post_attention_layernorm): Gemma4RMSNorm()
(pre_feedforward_layernorm): Gemma4RMSNorm()
(post_feedforward_layernorm): Gemma4RMSNorm()
(router): Gemma4TextRouter(
(norm): Gemma4RMSNorm()
(proj): Linear(in_features=8, out_features=128, bias=False)
)
(experts): Gemma4TextExperts(
(act_fn): GELUTanh()
)
(post_feedforward_layernorm_1): Gemma4RMSNorm()
(post_feedforward_layernorm_2): Gemma4RMSNorm()
(pre_feedforward_layernorm_2): Gemma4RMSNorm()
)
(1): Gemma4TextDecoderLayer(
(self_attn): Gemma4TextAttention(
(q_proj): Linear(in_features=8, out_features=512, bias=False)
(q_norm): Gemma4RMSNorm()
(k_norm): Gemma4RMSNorm()
(v_norm): Gemma4RMSNorm()
(k_proj): Linear(in_features=8, out_features=128, bias=False)
(o_proj): Linear(in_features=512, out_features=8, bias=False)
)
(mlp): Gemma4TextMLP(
(gate_proj): Linear(in_features=8, out_features=64, bias=False)
(up_proj): Linear(in_features=8, out_features=64, bias=False)
(down_proj): Linear(in_features=64, out_features=8, bias=False)
(act_fn): GELUTanh()
)
(input_layernorm): Gemma4RMSNorm()
(post_attention_layernorm): Gemma4RMSNorm()
(pre_feedforward_layernorm): Gemma4RMSNorm()
(post_feedforward_layernorm): Gemma4RMSNorm()
(router): Gemma4TextRouter(
(norm): Gemma4RMSNorm()
(proj): Linear(in_features=8, out_features=128, bias=False)
)
(experts): Gemma4TextExperts(
(act_fn): GELUTanh()
)
(post_feedforward_layernorm_1): Gemma4RMSNorm()
(post_feedforward_layernorm_2): Gemma4RMSNorm()
(pre_feedforward_layernorm_2): Gemma4RMSNorm()
)
(2): Gemma4TextDecoderLayer(
(self_attn): Gemma4TextAttention(
(q_proj): Linear(in_features=8, out_features=256, bias=False)
(q_norm): Gemma4RMSNorm()
(k_norm): Gemma4RMSNorm()
(v_norm): Gemma4RMSNorm()
(k_proj): Linear(in_features=8, out_features=128, bias=False)
(v_proj): Linear(in_features=8, out_features=128, bias=False)
(o_proj): Linear(in_features=256, out_features=8, bias=False)
)
(mlp): Gemma4TextMLP(
(gate_proj): Linear(in_features=8, out_features=64, bias=False)
(up_proj): Linear(in_features=8, out_features=64, bias=False)
(down_proj): Linear(in_features=64, out_features=8, bias=False)
(act_fn): GELUTanh()
)
(input_layernorm): Gemma4RMSNorm()
(post_attention_layernorm): Gemma4RMSNorm()
(pre_feedforward_layernorm): Gemma4RMSNorm()
(post_feedforward_layernorm): Gemma4RMSNorm()
(router): Gemma4TextRouter(
(norm): Gemma4RMSNorm()
(proj): Linear(in_features=8, out_features=128, bias=False)
)
(experts): Gemma4TextExperts(
(act_fn): GELUTanh()
)
(post_feedforward_layernorm_1): Gemma4RMSNorm()
(post_feedforward_layernorm_2): Gemma4RMSNorm()
(pre_feedforward_layernorm_2): Gemma4RMSNorm()
)
(3): Gemma4TextDecoderLayer(
(self_attn): Gemma4TextAttention(
(q_proj): Linear(in_features=8, out_features=512, bias=False)
(q_norm): Gemma4RMSNorm()
(k_norm): Gemma4RMSNorm()
(v_norm): Gemma4RMSNorm()
(k_proj): Linear(in_features=8, out_features=128, bias=False)
(o_proj): Linear(in_features=512, out_features=8, bias=False)
)
(mlp): Gemma4TextMLP(
(gate_proj): Linear(in_features=8, out_features=64, bias=False)
(up_proj): Linear(in_features=8, out_features=64, bias=False)
(down_proj): Linear(in_features=64, out_features=8, bias=False)
(act_fn): GELUTanh()
)
(input_layernorm): Gemma4RMSNorm()
(post_attention_layernorm): Gemma4RMSNorm()
(pre_feedforward_layernorm): Gemma4RMSNorm()
(post_feedforward_layernorm): Gemma4RMSNorm()
(router): Gemma4TextRouter(
(norm): Gemma4RMSNorm()
(proj): Linear(in_features=8, out_features=128, bias=False)
)
(experts): Gemma4TextExperts(
(act_fn): GELUTanh()
)
(post_feedforward_layernorm_1): Gemma4RMSNorm()
(post_feedforward_layernorm_2): Gemma4RMSNorm()
(pre_feedforward_layernorm_2): Gemma4RMSNorm()
)
)
(norm): Gemma4RMSNorm()
(rotary_emb): Gemma4TextRotaryEmbedding()
)
(embed_vision): Gemma4MultimodalEmbedder(
(embedding_projection): Linear(in_features=8, out_features=8, bias=False)
(embedding_pre_projection_norm): Gemma4RMSNorm()
)
)
(lm_head): Linear(in_features=8, out_features=262144, bias=False)
)