17 lines
411 B
Bash
17 lines
411 B
Bash
#! /bin/bash
|
|
|
|
export CUDA_VISIBLE_DEVICES="6,7"
|
|
|
|
MODEL="/home/kongcunliang/workspace/pretrained-models/unsloth/NVIDIA-Nemotron-3-Super-120B-A12B-GGUF/UD-Q4_K_XL/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003.gguf"
|
|
|
|
llama-server \
|
|
--model $MODEL \
|
|
--alias "nemotron-3-super" \
|
|
--prio 1 \
|
|
--min_p 0.01 \
|
|
--temp 0.6 \
|
|
--top-p 0.95 \
|
|
--ctx-size 1048576 \
|
|
--host 0.0.0.0 \
|
|
--port 8001
|