Architecture
{"total_parameters": 456000000000, "activated_parameters_per_token": 45900000000, "number_of_layers": 80, "attention_mechanism": {"type": "hybrid", "components": ["Lightning Attention", "Softmax Attention"], "configuration": {"lightning_attention_layers": 7, "softmax_attention_layers": 1, "attention_heads": 64, "attention_head_dimension": 128}}, "mixture_of_experts": {"number_of_experts": 32, "expert_hidden_dimension": 9216, "routing_strategy": "Top-2"}, "positional_encoding": {"type": "Rotary Position Embedding (RoPE)", "base_frequency": 10000000}, "hidden_size": 6144, "vocabulary_size": 200064}