Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions swift/megatron/arguments/megatron_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -706,10 +706,6 @@ def __post_init__(self):
if self.tuner_type == 'lora_llm':
if not self.is_multimodal:
raise ValueError('`tuner_type="lora_llm"` is only supported for multimodal models.')
if not self.merge_lora:
raise ValueError('`merge_lora` must be True when using `--tuner_type lora_llm`')
if not self.no_save_optim:
raise ValueError('`no_save_optim` must be True when using `--tuner_type lora_llm`')
if self.adapters or self.ref_adapters or self.mcore_adapter or self.mcore_ref_adapter:
if self.tuner_type == 'full':
self.tuner_type = 'lora'
Comment on lines 710 to 711
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

For multimodal models, when adapters are provided but tuner_type is not specified (defaulting to 'full'), it might be more appropriate to set tuner_type to 'lora_llm' instead of 'lora'. This ensures that the specialized multimodal LoRA logic is used consistently for multimodal models.

Suggested change
if self.tuner_type == 'full':
self.tuner_type = 'lora'
if self.tuner_type == 'full':
self.tuner_type = 'lora_llm' if self.is_multimodal else 'lora'

Expand Down
6 changes: 3 additions & 3 deletions swift/megatron/trainers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def _prepare_peft_model(self, models):
if args.mcore_model is None:
self.bridge.load_weights(models, args.model_dir)
peft_models = [prepare_mcore_model(args, model) for model in models]
if args.tuner_type == 'lora' and args.adapters and args.mcore_adapter is None:
if args.tuner_type in {'lora', 'lora_llm'} and args.adapters and args.mcore_adapter is None:
assert len(args.adapters) == 1, 'Currently only support one adapter.'
self.bridge.load_weights(models, args.adapters[0], peft_format=True, adapter_name='default')
return peft_models
Expand Down Expand Up @@ -727,7 +727,7 @@ def save_checkpoint(self):
os.makedirs(output_dir, exist_ok=True)
args_path = os.path.join(os.path.dirname(output_dir), 'args.json')
self.copy_path(args_path, os.path.join(output_dir, 'args.json'))
save_peft_format = args.tuner_type == 'lora' and not args.merge_lora
save_peft_format = args.tuner_type in {'lora', 'lora_llm'} and not args.merge_lora
if args.save_safetensors and args.no_save_optim:
model = []
else:
Expand All @@ -739,7 +739,7 @@ def save_checkpoint(self):
self.optimizer,
self.opt_param_scheduler,
iteration=iteration,
peft_format=args.tuner_type == 'lora',
peft_format=args.tuner_type in {'lora', 'lora_llm'},
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Setting peft_format=True for lora_llm ensures that LoRA weights are saved, but it may exclude other trainable parameters in multimodal models. If freeze_vit or freeze_aligner are set to False, their weights will be filtered out of the Megatron checkpoint because they are not LoRA parameters and are not automatically added to modules_to_save. Consider ensuring that peft_format is only True when no other non-LoRA parameters are trainable, or that those modules are included in the filter.

output_dir=output_dir)
state.last_model_checkpoint = output_dir
if state.best_global_step is not None:
Expand Down
Loading