{ "engine": "llama.cpp", "recommended_endpoint_settings": { "max_tokens_per_request": 1024, "max_concurrent_requests": 2, "notes": "Memory scales roughly with (max_concurrent_requests * max_tokens_per_request)." }, "recommended_generation_defaults": { "temperature": 1.2, "top_p": 0.95, "min_p": 0.05, "repeat_penalty": 1.08, "max_tokens": 2560 }, "chat_template": "phi4", "gguf_file": "", "gguf_quant": "q8_0" }