modelscope · Jintao-Huang · Dec 19, 2024 · Dec 19, 2024 · Dec 19, 2024 · Dec 19, 2024
diff --git a/swift/llm/model/model/internlm.py b/swift/llm/model/model/internlm.py
@@ -91,6 +91,7 @@ def load_model(self):
         CLIPVisionTower.load_model = load_model
 
     model, tokenizer = get_model_tokenizer_with_flash_attn(model_dir, model_info, model_kwargs, load_model, **kwargs)
+    model.vit.vision_tower.gradient_checkpointing_enable()
     if model is not None:
         if version == 'v2' and use_flash_attn:
             # fix AttributeError: no attribute 'attention_dropout'

diff --git a/swift/llm/model/utils.py b/swift/llm/model/utils.py
@@ -246,26 +246,27 @@ def safe_snapshot_download(model_id_or_path: str,
     if not download_model:
         ignore_file_pattern += ['*.bin', '*.safetensors']
     hub = get_hub(use_hf)
-    model_id_or_path = model_id_or_path.split(':', 1)  # get sub_folder
-    if len(model_id_or_path) == 1:
-        model_id_or_path = [model_id_or_path[0], None]
-    model_id_or_path, sub_folder = model_id_or_path
     if model_id_or_path.startswith('~'):
         model_id_or_path = os.path.abspath(os.path.expanduser(model_id_or_path))
     with safe_ddp_context(hash_id=model_id_or_path):
         if os.path.exists(model_id_or_path):
             model_dir = model_id_or_path
+            sub_folder = None
         else:
             if model_id_or_path.startswith('/'):  # startswith
                 raise ValueError(f"path: '{model_id_or_path}' not found")
+            model_id_or_path = model_id_or_path.split(':', 1)  # get sub_folder
+            if len(model_id_or_path) == 1:
+                model_id_or_path = [model_id_or_path[0], None]
+            model_id_or_path, sub_folder = model_id_or_path
             model_dir = hub.download_model(model_id_or_path, revision, ignore_file_pattern, token=hub_token, **kwargs)
 
         logger.info(f'Loading the model using model_dir: {model_dir}')
 
     model_dir = os.path.abspath(os.path.expanduser(model_dir))
-    assert os.path.isdir(model_dir), f'model_dir: {model_dir}'
     if sub_folder:
         model_dir = os.path.join(model_dir, sub_folder)
+    assert os.path.isdir(model_dir), f'model_dir: {model_dir}'
     return model_dir
 
 

diff --git a/swift/trainers/arguments.py b/swift/trainers/arguments.py
@@ -36,14 +36,14 @@ def _fix_gradient_checkpointing(self):
         if hasattr(torch.utils.checkpoint, '_old_checkpoint'):  # avoid double patching
             return
         # Consistent with the default behavior of transformers.
-        default_use_reentrant = (
+        use_reentrant_ = (
             self.gradient_checkpointing_kwargs.get('use_reentrant', True)
             if self.gradient_checkpointing_kwargs else True)
         _old_checkpoint = torch.utils.checkpoint.checkpoint
 
         @wraps(_old_checkpoint)
-        def _new_checkpoint(*args, use_reentrant=default_use_reentrant, **kwargs):
-            return _old_checkpoint(*args, use_reentrant=use_reentrant, **kwargs)
+        def _new_checkpoint(*args, use_reentrant=None, **kwargs):
+            return _old_checkpoint(*args, use_reentrant=use_reentrant_, **kwargs)
 
         torch.utils.checkpoint._old_checkpoint = _old_checkpoint
         torch.utils.checkpoint.checkpoint = _new_checkpoint