[feat] support megatron-LM training by mcore_adapter (#9237)

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Yaowei Zheng <hiyouga@buaa.edu.cn>
2026-03-23 10:43:22 +08:00 · 2025-10-26 16:21:30 +08:00
parent 129e918106
commit 13170577b2
14 changed files with 671 additions and 8 deletions
--- a/src/llamafactory/launcher.py
+++ b/src/llamafactory/launcher.py
@@ -54,6 +54,10 @@ def launch():
    )

    command = sys.argv.pop(1) if len(sys.argv) > 1 else "help"
+    if is_env_enabled("USE_MCA"):
+    # force use torchrun
+        os.environ["FORCE_TORCHRUN"] = "1"
+    
    if command == "train" and (is_env_enabled("FORCE_TORCHRUN") or (get_device_count() > 1 and not use_ray())):
        # launch distributed training
        nnodes = os.getenv("NNODES", "1")