change to sglang
This commit is contained in:
@@ -8,6 +8,7 @@ vllm serve $MODEL \
|
||||
--served-model-name "Qwen3.5-27B" \
|
||||
--port 8000 \
|
||||
--tensor-parallel-size 2 \
|
||||
--max-model-len 262144 \
|
||||
--reasoning-parser qwen3
|
||||
# --speculative-config '{"method":"mtp","num_speculative_tokens":1}'
|
||||
--max-model-len 16384 \
|
||||
--reasoning-parser qwen3 \
|
||||
--speculative-config '{"method":"mtp","num_speculative_tokens":1}'
|
||||
# --enable-prefix-caching \
|
||||
|
||||
Reference in New Issue
Block a user