deploy qwen3.5 and nemotron

2026-03-17 21:05:19 +08:00
parent 825c64c4dd
commit b48d522bb3
4 changed files with 4911 additions and 1 deletions
--- a/vllm-qwen3.5-27b.sh
+++ b/vllm-qwen3.5-27b.sh
@@ -0,0 +1,13 @@
+#! /bin/bash
+
+export CUDA_VISIBLE_DEVICES="4,5"
+#export PYTHONWARNINGS="ignore::UserWarning:vllm.model_executor.layers.fla"
+MODEL="/home/kongcunliang/workspace/pretrained-models/Qwen/Qwen3.5-27B-FP8"
+
+vllm serve $MODEL \
+  --served-model-name "Qwen3.5-27B" \
+  --port 8000 \
+  --tensor-parallel-size 2 \
+  --max-model-len 262144 \
+  --reasoning-parser qwen3
+# --speculative-config '{"method":"mtp","num_speculative_tokens":1}'