change to sglang

This commit is contained in:
2026-03-21 17:04:05 +08:00
parent b48d522bb3
commit bd67fc088f
6 changed files with 2468 additions and 1534 deletions

View File

@@ -8,6 +8,7 @@ vllm serve $MODEL \
--served-model-name "Qwen3.5-27B" \
--port 8000 \
--tensor-parallel-size 2 \
--max-model-len 262144 \
--reasoning-parser qwen3
# --speculative-config '{"method":"mtp","num_speculative_tokens":1}'
--max-model-len 16384 \
--reasoning-parser qwen3 \
--speculative-config '{"method":"mtp","num_speculative_tokens":1}'
# --enable-prefix-caching \