From 14329c2ec5e90014315ef361cb37ae2f12e3ca66 Mon Sep 17 00:00:00 2001 From: yyswhsccc Date: Sun, 17 May 2026 12:58:16 -0600 Subject: [PATCH 1/3] Stabilize vLLM TP rollout all-reduce --- swift/megatron/trainers/rollout_mixin.py | 5 +++-- swift/rlhf_trainers/rollout_mixin.py | 5 +++-- swift/utils/__init__.py | 5 +++-- swift/utils/env.py | 9 ++++++++ tests/utils/test_vllm_env.py | 27 ++++++++++++++++++++++++ 5 files changed, 45 insertions(+), 6 deletions(-) create mode 100644 tests/utils/test_vllm_env.py diff --git a/swift/megatron/trainers/rollout_mixin.py b/swift/megatron/trainers/rollout_mixin.py index 41072b6f75..7740ad0613 100644 --- a/swift/megatron/trainers/rollout_mixin.py +++ b/swift/megatron/trainers/rollout_mixin.py @@ -28,8 +28,8 @@ check_vllm_version_ge, expand_vllm_param_name_aliases, patch_vllm_load_adapter, patch_vllm_moe_model_weight_loader, profiling_context, profiling_decorator, set_expandable_segments, vllm_supports_lora_load_inplace) -from swift.utils import (get_current_device, get_logger, is_last_rank, is_vllm_available, remove_response, synchronize, - to_device) +from swift.utils import (configure_vllm_allreduce_env, get_current_device, get_logger, is_last_rank, is_vllm_available, + remove_response, synchronize, to_device) from .utils import (gather_object, load_megatron_model_to_gpu, load_megatron_optimizer, offload_megatron_model_to_cpu, offload_megatron_optimizer) @@ -246,6 +246,7 @@ def _init_rollout_engine(self): def _prepare_vllm_engine(self): """Create and configure vLLM engine for colocate mode.""" + configure_vllm_allreduce_env(self.vllm_tensor_parallel_size) from vllm.distributed import parallel_state as vllm_ps from swift.infer_engine import GRPOVllmEngine diff --git a/swift/rlhf_trainers/rollout_mixin.py b/swift/rlhf_trainers/rollout_mixin.py index f4ea58e1d0..3d553b7260 100644 --- a/swift/rlhf_trainers/rollout_mixin.py +++ b/swift/rlhf_trainers/rollout_mixin.py @@ -33,8 +33,8 @@ from swift.sequence_parallel import sequence_parallel from swift.template import Template from swift.tuners import Swift -from swift.utils import (get_current_device, get_logger, is_deepspeed_enabled, is_vllm_available, remove_response, - to_device) +from swift.utils import (configure_vllm_allreduce_env, get_current_device, get_logger, is_deepspeed_enabled, + is_vllm_available, remove_response, to_device) from .arguments import RolloutTrainerArgumentsMixin from .rlhf_mixin import RLHFTrainerMixin from .utils import (VLLM_LORA_INT_ID, VLLM_LORA_NAME, VLLM_LORA_PATH, FlattenedTensorBucket, TensorLoRARequest, @@ -257,6 +257,7 @@ def _prepare_vllm(self): def _prepare_vllm_engine(self): """Create and configure vLLM engine for colocate mode""" + configure_vllm_allreduce_env(self.vllm_tensor_parallel_size) from swift.infer_engine import GRPOVllmEngine args = self.args model = self.model diff --git a/swift/utils/__init__.py b/swift/utils/__init__.py index 522f6a0904..8b1e02dd60 100644 --- a/swift/utils/__init__.py +++ b/swift/utils/__init__.py @@ -1,7 +1,8 @@ # Copyright (c) ModelScope Contributors. All rights reserved. -from .env import (get_dist_setting, get_hf_endpoint, get_node_setting, get_pai_tensorboard_dir, is_deepspeed_enabled, - is_dist, is_last_rank, is_local_master, is_master, is_mp, is_mp_ddp, is_pai_training_job, use_hf_hub) +from .env import (configure_vllm_allreduce_env, get_dist_setting, get_hf_endpoint, get_node_setting, + get_pai_tensorboard_dir, is_deepspeed_enabled, is_dist, is_last_rank, is_local_master, is_master, + is_mp, is_mp_ddp, is_pai_training_job, use_hf_hub) from .hf_config import HfConfigFactory from .hub_utils import download_ms_file, git_clone_github, safe_snapshot_download from .import_utils import (is_flash_attn_2_available, is_flash_attn_3_available, is_liger_available, diff --git a/swift/utils/env.py b/swift/utils/env.py index 817a529539..e96b613202 100644 --- a/swift/utils/env.py +++ b/swift/utils/env.py @@ -84,6 +84,15 @@ def is_mp_ddp() -> bool: return False +def configure_vllm_allreduce_env(tensor_parallel_size: int) -> None: + if tensor_parallel_size <= 1 or 'VLLM_ALLREDUCE_USE_SYMM_MEM' in os.environ: + return + + os.environ['VLLM_ALLREDUCE_USE_SYMM_MEM'] = '0' + logger.info_once('Setting VLLM_ALLREDUCE_USE_SYMM_MEM=0 for vLLM tensor-parallel rollout. ' + 'Set the environment variable explicitly to override this stability default.') + + def is_pai_training_job() -> bool: return 'PAI_TRAINING_JOB_ID' in os.environ diff --git a/tests/utils/test_vllm_env.py b/tests/utils/test_vllm_env.py new file mode 100644 index 0000000000..5a12e7c8f8 --- /dev/null +++ b/tests/utils/test_vllm_env.py @@ -0,0 +1,27 @@ +import os + +from swift.utils.env import configure_vllm_allreduce_env + + +def test_configure_vllm_allreduce_env_sets_default_for_tensor_parallel(monkeypatch): + monkeypatch.delenv('VLLM_ALLREDUCE_USE_SYMM_MEM', raising=False) + + configure_vllm_allreduce_env(2) + + assert os.environ['VLLM_ALLREDUCE_USE_SYMM_MEM'] == '0' + + +def test_configure_vllm_allreduce_env_preserves_explicit_value(monkeypatch): + monkeypatch.setenv('VLLM_ALLREDUCE_USE_SYMM_MEM', '1') + + configure_vllm_allreduce_env(2) + + assert os.environ['VLLM_ALLREDUCE_USE_SYMM_MEM'] == '1' + + +def test_configure_vllm_allreduce_env_skips_single_tensor_parallel(monkeypatch): + monkeypatch.delenv('VLLM_ALLREDUCE_USE_SYMM_MEM', raising=False) + + configure_vllm_allreduce_env(1) + + assert 'VLLM_ALLREDUCE_USE_SYMM_MEM' not in os.environ From 9c842afa96545aea31c5411c250e6eed3713f26a Mon Sep 17 00:00:00 2001 From: yyswhsccc Date: Sun, 17 May 2026 13:03:45 -0600 Subject: [PATCH 2/3] Handle missing vLLM tensor parallel size --- swift/utils/env.py | 4 ++-- tests/utils/test_vllm_env.py | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/swift/utils/env.py b/swift/utils/env.py index e96b613202..fee2ae7a25 100644 --- a/swift/utils/env.py +++ b/swift/utils/env.py @@ -84,8 +84,8 @@ def is_mp_ddp() -> bool: return False -def configure_vllm_allreduce_env(tensor_parallel_size: int) -> None: - if tensor_parallel_size <= 1 or 'VLLM_ALLREDUCE_USE_SYMM_MEM' in os.environ: +def configure_vllm_allreduce_env(tensor_parallel_size: Optional[int]) -> None: + if not tensor_parallel_size or tensor_parallel_size <= 1 or 'VLLM_ALLREDUCE_USE_SYMM_MEM' in os.environ: return os.environ['VLLM_ALLREDUCE_USE_SYMM_MEM'] = '0' diff --git a/tests/utils/test_vllm_env.py b/tests/utils/test_vllm_env.py index 5a12e7c8f8..95f2f5e184 100644 --- a/tests/utils/test_vllm_env.py +++ b/tests/utils/test_vllm_env.py @@ -25,3 +25,11 @@ def test_configure_vllm_allreduce_env_skips_single_tensor_parallel(monkeypatch): configure_vllm_allreduce_env(1) assert 'VLLM_ALLREDUCE_USE_SYMM_MEM' not in os.environ + + +def test_configure_vllm_allreduce_env_skips_missing_tensor_parallel(monkeypatch): + monkeypatch.delenv('VLLM_ALLREDUCE_USE_SYMM_MEM', raising=False) + + configure_vllm_allreduce_env(None) + + assert 'VLLM_ALLREDUCE_USE_SYMM_MEM' not in os.environ From f927d48299df9452891abb38fd22471a2ae1c614 Mon Sep 17 00:00:00 2001 From: yyswhsccc Date: Sun, 17 May 2026 15:35:33 -0600 Subject: [PATCH 3/3] Cover vLLM TP all-reduce default in infer path --- swift/pipelines/infer/infer.py | 4 +++- swift/utils/env.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/swift/pipelines/infer/infer.py b/swift/pipelines/infer/infer.py index 90c1b8600c..b9ee3395bc 100644 --- a/swift/pipelines/infer/infer.py +++ b/swift/pipelines/infer/infer.py @@ -8,7 +8,8 @@ from swift.dataset import DatasetLoader, load_dataset, sample_dataset from swift.infer_engine import AdapterRequest, InferRequest, RequestConfig, TransformersEngine from swift.metrics import InferStats, MeanMetric, compute_rouge_bleu -from swift.utils import JsonlWriter, get_dist_setting, get_logger, is_dist, is_master, read_from_jsonl +from swift.utils import (JsonlWriter, configure_vllm_allreduce_env, get_dist_setting, get_logger, is_dist, is_master, + read_from_jsonl) from ..base import SwiftPipeline from ..export import merge_lora from ..utils import get_cached_dataset, prepare_model_template @@ -65,6 +66,7 @@ def get_infer_engine(args: InferArguments, template=None, **extra_kwargs): if hasattr(args, 'max_batch_size'): kwargs.update({'max_batch_size': args.max_batch_size}) elif infer_backend == 'vllm': + configure_vllm_allreduce_env(args.vllm_tensor_parallel_size) from swift.infer_engine import VllmEngine infer_engine_cls = VllmEngine kwargs.update(args.get_vllm_engine_kwargs()) diff --git a/swift/utils/env.py b/swift/utils/env.py index fee2ae7a25..2b99fe57cd 100644 --- a/swift/utils/env.py +++ b/swift/utils/env.py @@ -89,7 +89,7 @@ def configure_vllm_allreduce_env(tensor_parallel_size: Optional[int]) -> None: return os.environ['VLLM_ALLREDUCE_USE_SYMM_MEM'] = '0' - logger.info_once('Setting VLLM_ALLREDUCE_USE_SYMM_MEM=0 for vLLM tensor-parallel rollout. ' + logger.info_once('Setting VLLM_ALLREDUCE_USE_SYMM_MEM=0 for vLLM tensor-parallel execution. ' 'Set the environment variable explicitly to override this stability default.')