deploy qwen3.5 and nemotron

2026-03-17 21:05:19 +08:00
parent 825c64c4dd
commit b48d522bb3
4 changed files with 4911 additions and 1 deletions
--- a/llamacpp-nemotron-super.sh
+++ b/llamacpp-nemotron-super.sh
@@ -0,0 +1,16 @@
+#! /bin/bash
+
+export CUDA_VISIBLE_DEVICES="6,7"
+
+MODEL="/home/kongcunliang/workspace/pretrained-models/unsloth/NVIDIA-Nemotron-3-Super-120B-A12B-GGUF/UD-Q4_K_XL/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003.gguf"
+
+llama-server \
+  --model $MODEL \
+  --alias "nemotron-3-super" \
+  --prio 1 \
+  --min_p 0.01 \
+  --temp 0.6 \
+  --top-p 0.95 \
+  --ctx-size 1048576 \
+  --host 0.0.0.0 \
+  --port 8001