-
Notifications
You must be signed in to change notification settings - Fork 1.4k
[megatron] megatron lora_llm support no_save_optim #9269
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -195,7 +195,7 @@ def _prepare_peft_model(self, models): | |
| if args.mcore_model is None: | ||
| self.bridge.load_weights(models, args.model_dir) | ||
| peft_models = [prepare_mcore_model(args, model) for model in models] | ||
| if args.tuner_type == 'lora' and args.adapters and args.mcore_adapter is None: | ||
| if args.tuner_type in {'lora', 'lora_llm'} and args.adapters and args.mcore_adapter is None: | ||
| assert len(args.adapters) == 1, 'Currently only support one adapter.' | ||
| self.bridge.load_weights(models, args.adapters[0], peft_format=True, adapter_name='default') | ||
| return peft_models | ||
|
|
@@ -727,7 +727,7 @@ def save_checkpoint(self): | |
| os.makedirs(output_dir, exist_ok=True) | ||
| args_path = os.path.join(os.path.dirname(output_dir), 'args.json') | ||
| self.copy_path(args_path, os.path.join(output_dir, 'args.json')) | ||
| save_peft_format = args.tuner_type == 'lora' and not args.merge_lora | ||
| save_peft_format = args.tuner_type in {'lora', 'lora_llm'} and not args.merge_lora | ||
| if args.save_safetensors and args.no_save_optim: | ||
| model = [] | ||
| else: | ||
|
|
@@ -739,7 +739,7 @@ def save_checkpoint(self): | |
| self.optimizer, | ||
| self.opt_param_scheduler, | ||
| iteration=iteration, | ||
| peft_format=args.tuner_type == 'lora', | ||
| peft_format=args.tuner_type in {'lora', 'lora_llm'}, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Setting |
||
| output_dir=output_dir) | ||
| state.last_model_checkpoint = output_dir | ||
| if state.best_global_step is not None: | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For multimodal models, when adapters are provided but
tuner_typeis not specified (defaulting to'full'), it might be more appropriate to settuner_typeto'lora_llm'instead of'lora'. This ensures that the specialized multimodal LoRA logic is used consistently for multimodal models.