#! /bin/bash export CUDA_VISIBLE_DEVICES=4,5,6,7 export SGLANG_ENABLE_SPEC_V2=1 MODEL="/home/kongcunliang/workspace/pretrained-models/Qwen/Qwen3.5-27B-FP8" sglang serve \ --model-path $MODEL \ --host 0.0.0.0 \ --port 8000 \ --tp-size 4 \ --mem-fraction-static 0.8 \ --context-length 262144 \ --reasoning-parser qwen3 \ --speculative-algo NEXTN \ --speculative-num-steps 3 \ --speculative-eagle-topk 1 \ --speculative-num-draft-tokens 4 \ --mamba-scheduler-strategy extra_buffer \ --max-running-requests 192 \ --served-model-name Qwen3.5-27B