change to sglang

2026-03-21 17:04:05 +08:00
parent b48d522bb3
commit bd67fc088f
6 changed files with 2468 additions and 1534 deletions
--- a/sglang_qwen3.5.sh
+++ b/sglang_qwen3.5.sh
@@ -0,0 +1,21 @@
+#! /bin/bash
+export CUDA_VISIBLE_DEVICES=4,5,6,7
+export SGLANG_ENABLE_SPEC_V2=1
+
+MODEL="/home/kongcunliang/workspace/pretrained-models/Qwen/Qwen3.5-27B-FP8"
+
+sglang serve \
+  --model-path $MODEL \
+  --host 0.0.0.0 \
+  --port 8000 \
+  --tp-size 4 \
+  --mem-fraction-static 0.8 \
+  --context-length 262144 \
+  --reasoning-parser qwen3 \
+  --speculative-algo NEXTN \
+  --speculative-num-steps 3 \
+  --speculative-eagle-topk 1 \
+  --speculative-num-draft-tokens 4 \
+  --mamba-scheduler-strategy extra_buffer \
+  --max-running-requests 192 \
+  --served-model-name Qwen3.5-27B