You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[2024-08-09 17:29:22,420] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to mps (auto detect)
[2024-08-09 17:29:22,567] torch.distributed.elastic.multiprocessing.redirects: [WARNING] NOTE: Redirects are currently not supported in Windows or MacOs.
[2024-08-09 17:29:23,636] [INFO] [comm.py:637:init_distributed] cdb=None
[2024-08-09 17:29:23,636] [INFO] [comm.py:668:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl
Traceback (most recent call last):
File "/Users/rufuslee/Downloads/Llama-Chinese-main/train/sft/finetune_clm_lora.py", line 694, in
main()
File "/Users/rufuslee/Downloads/Llama-Chinese-main/train/sft/finetune_clm_lora.py", line 281, in main
model_args, data_args, training_args = parser.parse_args_into_dataclasses()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/rufuslee/Downloads/Llama-Chinese-main/tunevenv/lib/python3.11/site-packages/transformers/hf_argparser.py", line 338, in parse_args_into_dataclasses
obj = dtype(**inputs)
^^^^^^^^^^^^^^^
File "", line 124, in init
File "/Users/rufuslee/Downloads/Llama-Chinese-main/tunevenv/lib/python3.11/site-packages/transformers/training_args.py", line 1551, in post_init
and (self.device.type != "cuda")
^^^^^^^^^^^
File "/Users/rufuslee/Downloads/Llama-Chinese-main/tunevenv/lib/python3.11/site-packages/transformers/training_args.py", line 2028, in device
return self._setup_devices
^^^^^^^^^^^^^^^^^^^
File "/Users/rufuslee/Downloads/Llama-Chinese-main/tunevenv/lib/python3.11/site-packages/transformers/utils/generic.py", line 63, in get
cached = self.fget(obj)
^^^^^^^^^^^^^^
File "/Users/rufuslee/Downloads/Llama-Chinese-main/tunevenv/lib/python3.11/site-packages/transformers/training_args.py", line 1959, in _setup_devices
self.distributed_state = PartialState(timeout=timedelta(seconds=self.ddp_timeout))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/rufuslee/Downloads/Llama-Chinese-main/tunevenv/lib/python3.11/site-packages/accelerate/state.py", line 190, in init
dist.init_distributed(dist_backend=self.backend, auto_mpi_discovery=False, **kwargs)
File "/Users/rufuslee/Downloads/Llama-Chinese-main/tunevenv/lib/python3.11/site-packages/deepspeed/comm/comm.py", line 670, in init_distributed
cdb = TorchBackend(dist_backend, timeout, init_method, rank, world_size)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/rufuslee/Downloads/Llama-Chinese-main/tunevenv/lib/python3.11/site-packages/deepspeed/comm/torch.py", line 121, in init
self.init_process_group(backend, timeout, init_method, rank, world_size)
File "/Users/rufuslee/Downloads/Llama-Chinese-main/tunevenv/lib/python3.11/site-packages/deepspeed/comm/torch.py", line 149, in init_process_group
torch.distributed.init_process_group(backend,
File "/Users/rufuslee/Downloads/Llama-Chinese-main/tunevenv/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", line 74, in wrapper
func_return = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/Users/rufuslee/Downloads/Llama-Chinese-main/tunevenv/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", line 1148, in init_process_group
default_pg, _ = _new_process_group_helper(
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/rufuslee/Downloads/Llama-Chinese-main/tunevenv/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", line 1268, in _new_process_group_helper
raise RuntimeError("Distributed package doesn't have NCCL built in")
RuntimeError: Distributed package doesn't have NCCL built in
The text was updated successfully, but these errors were encountered:
[2024-08-09 17:29:22,420] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to mps (auto detect)
[2024-08-09 17:29:22,567] torch.distributed.elastic.multiprocessing.redirects: [WARNING] NOTE: Redirects are currently not supported in Windows or MacOs.
[2024-08-09 17:29:23,636] [INFO] [comm.py:637:init_distributed] cdb=None
[2024-08-09 17:29:23,636] [INFO] [comm.py:668:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl
Traceback (most recent call last):
File "/Users/rufuslee/Downloads/Llama-Chinese-main/train/sft/finetune_clm_lora.py", line 694, in
main()
File "/Users/rufuslee/Downloads/Llama-Chinese-main/train/sft/finetune_clm_lora.py", line 281, in main
model_args, data_args, training_args = parser.parse_args_into_dataclasses()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/rufuslee/Downloads/Llama-Chinese-main/tunevenv/lib/python3.11/site-packages/transformers/hf_argparser.py", line 338, in parse_args_into_dataclasses
obj = dtype(**inputs)
^^^^^^^^^^^^^^^
File "", line 124, in init
File "/Users/rufuslee/Downloads/Llama-Chinese-main/tunevenv/lib/python3.11/site-packages/transformers/training_args.py", line 1551, in post_init
and (self.device.type != "cuda")
^^^^^^^^^^^
File "/Users/rufuslee/Downloads/Llama-Chinese-main/tunevenv/lib/python3.11/site-packages/transformers/training_args.py", line 2028, in device
return self._setup_devices
^^^^^^^^^^^^^^^^^^^
File "/Users/rufuslee/Downloads/Llama-Chinese-main/tunevenv/lib/python3.11/site-packages/transformers/utils/generic.py", line 63, in get
cached = self.fget(obj)
^^^^^^^^^^^^^^
File "/Users/rufuslee/Downloads/Llama-Chinese-main/tunevenv/lib/python3.11/site-packages/transformers/training_args.py", line 1959, in _setup_devices
self.distributed_state = PartialState(timeout=timedelta(seconds=self.ddp_timeout))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/rufuslee/Downloads/Llama-Chinese-main/tunevenv/lib/python3.11/site-packages/accelerate/state.py", line 190, in init
dist.init_distributed(dist_backend=self.backend, auto_mpi_discovery=False, **kwargs)
File "/Users/rufuslee/Downloads/Llama-Chinese-main/tunevenv/lib/python3.11/site-packages/deepspeed/comm/comm.py", line 670, in init_distributed
cdb = TorchBackend(dist_backend, timeout, init_method, rank, world_size)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/rufuslee/Downloads/Llama-Chinese-main/tunevenv/lib/python3.11/site-packages/deepspeed/comm/torch.py", line 121, in init
self.init_process_group(backend, timeout, init_method, rank, world_size)
File "/Users/rufuslee/Downloads/Llama-Chinese-main/tunevenv/lib/python3.11/site-packages/deepspeed/comm/torch.py", line 149, in init_process_group
torch.distributed.init_process_group(backend,
File "/Users/rufuslee/Downloads/Llama-Chinese-main/tunevenv/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", line 74, in wrapper
func_return = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/Users/rufuslee/Downloads/Llama-Chinese-main/tunevenv/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", line 1148, in init_process_group
default_pg, _ = _new_process_group_helper(
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/rufuslee/Downloads/Llama-Chinese-main/tunevenv/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", line 1268, in _new_process_group_helper
raise RuntimeError("Distributed package doesn't have NCCL built in")
RuntimeError: Distributed package doesn't have NCCL built in
The text was updated successfully, but these errors were encountered: