Skip to content

Commit 88fdc64

Browse files
committed
[update] add vlm internvl2. rename json name.
1 parent a399667 commit 88fdc64

7 files changed

Lines changed: 24 additions & 38 deletions

projects/llm_framework/main_llm/llama3.2-1B-prefill-ax630c_tokenizer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,8 @@ def do_POST(self):
122122

123123
tokenizer = Tokenizer_Http(args.model_id)
124124

125-
print(tokenizer.bos_id, tokenizer.bos_token, tokenizer.eos_id, tokenizer.eos_token)
126-
print(tokenizer.encode("hello world", args.content))
125+
# print(tokenizer.bos_id, tokenizer.bos_token, tokenizer.eos_id, tokenizer.eos_token)
126+
# print(tokenizer.encode("hello world", args.content))
127127

128128
host = (args.host, args.port) #设定地址与端口号,'localhost'等价于'127.0.0.1'
129129
print('http://%s:%s' % host)

projects/llm_framework/main_llm/mode_openbuddy-llama3.2-1B-ax630c.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"mode":"openbuddy-llama3.2-1b-ax630c",
2+
"mode":"openbuddy-llama3.2-1B-ax630c",
33
"type":"llm",
44
"capabilities":[
55
"text_generation",

projects/llm_framework/main_llm/openbuddy-llama3.2-1B-ax630c_tokenizer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,8 @@ def do_POST(self):
123123

124124
tokenizer = Tokenizer_Http(args.model_id)
125125

126-
print(tokenizer.bos_id, tokenizer.bos_token, tokenizer.eos_id, tokenizer.eos_token)
127-
print(tokenizer.encode("hello world", args.content))
126+
# print(tokenizer.bos_id, tokenizer.bos_token, tokenizer.eos_id, tokenizer.eos_token)
127+
# print(tokenizer.encode("hello world", args.content))
128128

129129
host = (args.host, args.port) #设定地址与端口号,'localhost'等价于'127.0.0.1'
130130
print('http://%s:%s' % host)

projects/llm_framework/main_llm/qwen2.5-coder-0.5B-ax630c_tokenizer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,8 @@ def do_POST(self):
122122

123123
tokenizer = Tokenizer_Http(args.model_id)
124124

125-
print(tokenizer.bos_id, tokenizer.bos_token, tokenizer.eos_id, tokenizer.eos_token)
126-
print(tokenizer.encode("hello world", args.content))
125+
# print(tokenizer.bos_id, tokenizer.bos_token, tokenizer.eos_id, tokenizer.eos_token)
126+
# print(tokenizer.encode("hello world", args.content))
127127

128128
host = (args.host, args.port) #设定地址与端口号,'localhost'等价于'127.0.0.1'
129129
print('http://%s:%s' % host)

projects/llm_framework/main_vlm/SConstruct

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ static_file += [AFile('../static_lib/libopencv-4.6-aarch64-none/lib/libtegra_hal
4949
static_file += [AFile('../static_lib/libopencv-4.6-aarch64-none/lib/libzlib.a')]
5050
STATIC_LIB += static_file * 4
5151

52-
STATIC_FILES += [AFile('internvl2-1b-ax630c_tokenizer.py')]
52+
STATIC_FILES += [AFile('internvl2-1B-ax630c_tokenizer.py')]
5353
STATIC_FILES += Glob('mode_*.json')
5454

5555
env['COMPONENTS'].append({'target':'llm_vlm',

projects/llm_framework/main_vlm/internvl2-1b-ax630c_tokenizer.py renamed to projects/llm_framework/main_vlm/internvl2-1B-ax630c_tokenizer.py

Lines changed: 15 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,18 @@
66

77
class Tokenizer_Http:
88

9-
def __init__(self):
10-
11-
path = "internvl2_tokenizer"
9+
def __init__(self, model_id):
1210
self.tokenizer = AutoTokenizer.from_pretrained(
13-
path, trust_remote_code=True, use_fast=False
11+
model_id, trust_remote_code=True, use_fast=False
1412
)
1513

16-
def encode(self, content):
17-
prompt = f"<|im_start|>system\n你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型,英文名叫InternVL, 是一个有用无害的人工智能助手。<|im_end|><|im_start|>user\n{content}<|im_end|><|im_start|>assistant\n"
14+
def encode(self, prompt, content):
15+
prompt = f"<|im_start|>system\n{content}<|im_end|><|im_start|>user\n{prompt}<|im_end|><|im_start|>assistant\n"
1816
input_ids = self.tokenizer.encode(prompt)
1917
return input_ids
2018

21-
def encode_vpm(self, content="Please describe the image shortly."):
22-
prompt = f"<|im_start|>system\n你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型,英文名叫InternVL, 是一个有用无害的人工智能助手。<|im_end|><|im_start|>user\n<img><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT></img>\n{content}<|im_end|><|im_start|>assistant\n"
19+
def encode_vpm(self, prompt, content="Please describe the image shortly."):
20+
prompt = f"<|im_start|>system\n{content}<|im_end|><|im_start|>user\n<img><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT><IMG_CONTEXT></img>\n{prompt}<|im_end|><|im_start|>assistant\n"
2321
input_ids = self.tokenizer.encode(prompt)
2422
return input_ids
2523

@@ -42,26 +40,6 @@ def bos_token(self):
4240
def eos_token(self):
4341
return self.tokenizer.eos_token
4442

45-
46-
tokenizer = Tokenizer_Http()
47-
48-
print(tokenizer.bos_id, tokenizer.bos_token, tokenizer.eos_id, tokenizer.eos_token)
49-
token_ids = tokenizer.encode_vpm()
50-
# [151644, 8948, 198, 56568, 104625, 100633, 104455, 104800, 101101, 32022, 102022, 99602, 100013, 9370, 90286, 21287, 42140, 53772, 35243, 26288, 104949, 3837, 105205, 109641, 67916, 30698, 11, 54851, 46944, 115404, 42192, 99441, 100623, 48692, 100168, 110498, 1773, 151645, 151644, 872, 198,
51-
# 151646,
52-
# 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648, 151648,
53-
# 151647,
54-
# 198, 5501, 7512, 279, 2168, 19620, 13, 151645, 151644, 77091, 198]
55-
# 118
56-
print(token_ids)
57-
print(len(token_ids))
58-
token_ids = tokenizer.encode("hello world")
59-
# [151644, 8948, 198, 56568, 104625, 100633, 104455, 104800, 101101, 32022, 102022, 99602, 100013, 9370, 90286, 21287, 42140, 53772, 35243, 26288, 104949, 3837, 105205, 109641, 67916, 30698, 11, 54851, 46944, 115404, 42192, 99441, 100623, 48692, 100168, 110498, 1773, 151645, 151644, 872, 198, 14990, 1879, 151645, 151644, 77091, 198]
60-
# 47
61-
print(token_ids)
62-
print(len(token_ids))
63-
64-
6543
class Request(BaseHTTPRequestHandler):
6644
# 通过类继承,新定义类
6745
timeout = 5
@@ -117,7 +95,7 @@ def do_POST(self):
11795
if b_img_prompt:
11896
token_ids = tokenizer.encode_vpm(prompt)
11997
else:
120-
token_ids = tokenizer.encode(prompt)
98+
token_ids = tokenizer.encode(prompt, args.content)
12199
if token_ids is None:
122100
msg = json.dumps({"token_ids": -1})
123101
else:
@@ -144,8 +122,16 @@ def do_POST(self):
144122
args = argparse.ArgumentParser()
145123
args.add_argument("--host", type=str, default="localhost")
146124
args.add_argument("--port", type=int, default=8080)
125+
args.add_argument('--model_id', type=str, default='internvl2_tokenizer')
126+
args.add_argument('--content', type=str, default='你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型,英文名叫InternVL, 是一个有用无害的人工智能助手。')
147127
args = args.parse_args()
148128

129+
tokenizer = Tokenizer_Http(args.model_id)
130+
131+
132+
# print(tokenizer.bos_id, tokenizer.bos_token, tokenizer.eos_id, tokenizer.eos_token)
133+
# print(tokenizer.encode("hello world", args.content))
134+
149135
host = (args.host, args.port) # 设定地址与端口号,'localhost'等价于'127.0.0.1'
150136
print("http://%s:%s" % host)
151137
server = HTTPServer(host, Request) # 根据地址端口号和新定义的类,创建服务器实例

projects/llm_framework/main_vlm/mode_internvl2-1b-ax630c.json renamed to projects/llm_framework/main_vlm/mode_internvl2-1B-ax630c.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"mode":"internvl2-1b-ax630c",
2+
"mode":"internvl2-1B-ax630c",
33
"type":"vlm",
44
"capabilities":[
55
"text_generation",

0 commit comments

Comments
 (0)