Fix internlm bug

czczup · czczup · commit 8b92432f55ea · 2024-05-08T22:41:22.000+08:00
diff --git a/internvl_chat/internvl/train/internvl_chat_finetune.py b/internvl_chat/internvl/train/internvl_chat_finetune.py
@@ -489,10 +489,12 @@ def main():
         logger.info('Loading InternVLChatModel...')
         config = InternVLChatConfig.from_pretrained(model_args.model_name_or_path)
         config.vision_config.drop_path_rate = model_args.drop_path_rate
-        if 'internlm' in model_args.model_name_or_path.lower():
+        if config.llm_config.model_type == 'internlm2':
             config.llm_config.attn_implementation = 'flash_attention_2'  # for InternLM
+            logger.info('Using flash_attention_2 for InternLM')
         else:
             config.llm_config._attn_implementation = 'flash_attention_2'  # for LLaMA
+            logger.info('Using flash_attention_2 for LLaMA')
         config.template = data_args.conv_style
         config.select_layer = model_args.vision_select_layer
         config.dynamic_image_size = data_args.dynamic_image_size
@@ -510,10 +512,12 @@ def main():
             model_args.vision_path, torch_dtype=torch.bfloat16, config=vision_config)
         logger.info('Loading LLaMA...')
         llm_config = AutoConfig.from_pretrained(model_args.llm_path, trust_remote_code=True)
-        if 'internlm' in model_args.llm_path.lower():
+        if llm_config.model_type == 'internlm2':
             llm_config.attn_implementation = 'flash_attention_2'  # for InternLM
+            logger.info('Using flash_attention_2 for InternLM')
         else:
             llm_config._attn_implementation = 'flash_attention_2'  # for LLaMA
+            logger.info('Using flash_attention_2 for LLaMA')
         llm = AutoModelForCausalLM.from_pretrained(
             model_args.llm_path, torch_dtype=torch.bfloat16,
             config=llm_config, trust_remote_code=True)
diff --git a/internvl_chat/internvl/train/internvl_chat_pretrain.py b/internvl_chat/internvl/train/internvl_chat_pretrain.py
@@ -509,10 +509,12 @@ def main():
         logger.info('Loading InternVLChatModel...')
         config = InternVLChatConfig.from_pretrained(model_args.model_name_or_path)
         config.vision_config.drop_path_rate = model_args.drop_path_rate
-        if 'internlm' in model_args.model_name_or_path.lower():
+        if config.llm_config.model_type == 'internlm2':
             config.llm_config.attn_implementation = 'flash_attention_2'  # for InternLM
+            logger.info('Using flash_attention_2 for InternLM')
         else:
             config.llm_config._attn_implementation = 'flash_attention_2'  # for LLaMA
+            logger.info('Using flash_attention_2 for LLaMA')
         config.template = data_args.conv_style
         config.select_layer = model_args.vision_select_layer
         config.dynamic_image_size = data_args.dynamic_image_size
@@ -530,10 +532,12 @@ def main():
             model_args.vision_path, torch_dtype=torch.bfloat16, config=vision_config)
         logger.info('Loading LLaMA...')
         llm_config = AutoConfig.from_pretrained(model_args.llm_path, trust_remote_code=True)
-        if 'internlm' in model_args.llm_path.lower():
+        if llm_config.model_type == 'internlm2':
             llm_config.attn_implementation = 'flash_attention_2'  # for InternLM
+            logger.info('Using flash_attention_2 for InternLM')
         else:
             llm_config._attn_implementation = 'flash_attention_2'  # for LLaMA
+            logger.info('Using flash_attention_2 for LLaMA')
         llm = AutoModelForCausalLM.from_pretrained(
             model_args.llm_path, torch_dtype=torch.bfloat16,
             config=llm_config, trust_remote_code=True)
diff --git a/internvl_chat/tools/replace_llm.py b/internvl_chat/tools/replace_llm.py
@@ -2,7 +2,7 @@
 
 import torch
 from internvl.model.internvl_chat import InternVLChatModel
-from transformers import LlamaForCausalLM, LlamaTokenizer
+from transformers import AutoModel, AutoTokenizer
 
 argparse = argparse.ArgumentParser()
 argparse.add_argument('model_path', type=str, default='')
@@ -13,10 +13,12 @@
 if args.model_path[-1] == '/':
     args.model_path = args.model_path[:-1]
 
-model = InternVLChatModel.from_pretrained(args.model_path)
+model = InternVLChatModel.from_pretrained(args.model_path, torch_dtype=torch.bfloat16)
 
-llm = LlamaForCausalLM.from_pretrained(args.llm_path)
-tokenizer = LlamaTokenizer.from_pretrained(args.llm_path)
+llm = AutoModel.from_pretrained(
+    args.llm_path, trust_remote_code=True, torch_dtype=torch.bfloat16)
+tokenizer = AutoTokenizer.from_pretrained(
+    args.llm_path, trust_remote_code=True)
 model.language_model = llm
 model.config.llm_config = llm.config
 model.to(torch.bfloat16)