mirror of
https://github.com/hiyouga/LlamaFactory.git
synced 2026-03-23 10:43:22 +08:00
[feat] support megatron-LM training by mcore_adapter (#9237)
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Yaowei Zheng <hiyouga@buaa.edu.cn>
This commit is contained in:
@@ -54,6 +54,10 @@ def launch():
|
||||
)
|
||||
|
||||
command = sys.argv.pop(1) if len(sys.argv) > 1 else "help"
|
||||
if is_env_enabled("USE_MCA"):
|
||||
# force use torchrun
|
||||
os.environ["FORCE_TORCHRUN"] = "1"
|
||||
|
||||
if command == "train" and (is_env_enabled("FORCE_TORCHRUN") or (get_device_count() > 1 and not use_ray())):
|
||||
# launch distributed training
|
||||
nnodes = os.getenv("NNODES", "1")
|
||||
|
||||
Reference in New Issue
Block a user