Files
models-deploy/llamacpp-nemotron-super.sh
2026-03-21 17:04:05 +08:00

15 lines
382 B
Bash

#! /bin/bash
export CUDA_VISIBLE_DEVICES="6,7"
MODEL="/home/kongcunliang/workspace/pretrained-models/unsloth/NVIDIA-Nemotron-3-Super-120B-A12B-GGUF/UD-Q4_K_XL/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003.gguf"
llama-server \
--model $MODEL \
--alias "nemotron-3-super" \
--temp 0.6 \
--top-p 0.95 \
--ctx-size 262144 \
--host 0.0.0.0 \
--port 8001 \