[model] update kt code (#9406)

This commit is contained in:
Yaowei Zheng
2025-11-05 15:27:22 +08:00
committed by GitHub
parent 56f45e826f
commit eaf963f67f
28 changed files with 108 additions and 68 deletions

View File

@@ -57,7 +57,9 @@ def launch():
if is_env_enabled("USE_MCA"): # force use torchrun
os.environ["FORCE_TORCHRUN"] = "1"
if command == "train" and (is_env_enabled("FORCE_TORCHRUN") or (get_device_count() > 1 and not use_ray() and not use_kt())):
if command == "train" and (
is_env_enabled("FORCE_TORCHRUN") or (get_device_count() > 1 and not use_ray() and not use_kt())
):
# launch distributed training
nnodes = os.getenv("NNODES", "1")
node_rank = os.getenv("NODE_RANK", "0")