#! /bin/bash export CUDA_VISIBLE_DEVICES="6,7" MODEL="/home/kongcunliang/workspace/pretrained-models/unsloth/NVIDIA-Nemotron-3-Super-120B-A12B-GGUF/UD-Q4_K_XL/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003.gguf" llama-server \ --model $MODEL \ --alias "nemotron-3-super" \ --temp 0.6 \ --top-p 0.95 \ --ctx-size 262144 \ --host 0.0.0.0 \ --port 8001 \