change to sglang

2026-03-21 17:04:05 +08:00
parent b48d522bb3
commit bd67fc088f
6 changed files with 2468 additions and 1534 deletions
--- a/vllm-qwen3.5-27b.sh
+++ b/vllm-qwen3.5-27b.sh
@@ -8,6 +8,7 @@ vllm serve $MODEL \
  --served-model-name "Qwen3.5-27B" \
  --port 8000 \
  --tensor-parallel-size 2 \
-  --max-model-len 262144 \
-  --reasoning-parser qwen3
-# --speculative-config '{"method":"mtp","num_speculative_tokens":1}'
+  --max-model-len 16384 \
+  --reasoning-parser qwen3 \
+  --speculative-config '{"method":"mtp","num_speculative_tokens":1}'
+# --enable-prefix-caching \