build/bin/llama-server \ -m ~/models/llm/qwen3.6-27b/qwen3.6-27B-q8_0.gguf \ --no-mmap \ --n-gpu-layers all \ --ctx-size 131072 \ --flash-attn on \ --cache-type-k q8_0 \ --cache-type-v q8_0 \ --jinja \ --no-mmproj \ --parallel 1 \ --cache-ram 4096 -ctxcp 2 \ --reasoning on \ --chat-template-kwargs '{"preserve_thinking": true}'
self model context compute 30968 = 25972 + 4501 + 495