我现在使用vllm在本地部署qwen2-2b-vl,另一台电脑用requests发送请求,IP地址已经隐藏,我发送两张照片,prompt也有两个image_pad,为什么模型回复我的只有一张照片的答案?
vllm serve Qwen2-VL-2B --dtype auto --port 8000 --limit_mm_per_prompt image=4
import requests
import json
from PIL import Image
import base64
def encode_image(image_path): # 编码本地图片的函数
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
# 1.url
url = 'http://xxxx:8000/v1/chat/completions' # 命令行启动,用这个url
# 2.data
## 2.3命令行启动,使用本地图片,用这个data
image_path1 = "1.jpg"
image_path2 = "2.jpg"
base64_image1 = encode_image(image_path1)
base64_image2 = encode_image(image_path2)
data = {"model": "Qwen2-VL-2B",
"messages": [{"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
{"role": "user",
"content": [
{"type": "image_url","image_url": {"url": f"data:image/jpeg;base64,{base64_image1}"}},
{"type": "image_url","image_url": {"url": f"data:image/jpeg;base64,{base64_image2}"}},
{"type": "text", "text": "描述一下这两张照片"},],}],
"temperature": 0.7,"top_p": 0.8,"repetition_penalty": 1.05,"max_tokens": 4096}
# 3.将字典转换为 JSON 字符串
json_payload = json.dumps(data)
# 4.发送 POST 请求
headers = {'Content-Type': 'application/json'}
response = requests.post(url, data=json_payload, headers=headers)
# 5.打印响应内容
# print(response.json()).get('generated_text', [])[0])#.get('outputs', [])[0].get("text", [0])) # 如果server.py启动,用这个打印
# print(response.json().get("choices", [])[0].get("message", []).get("content", [])) # 命令行启动,用这个打印
print(response.json())
{'id': 'chat-f4b3a5d73db64cab9b90c9e7dde73be0', 'object': 'chat.completion', 'created': 1728650995, 'model': 'Qwen2-VL-2B', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': '这两张图片展示了一个可爱的卡通形象,背景是简单的白色。图片中有一个橙色的数字“2”,数字周围有一些小点和彩色的装饰物,如粉色的饮料杯和绿色的叶子。数字“2”下方还有一个蓝色的小怪兽,它看起来很开心,手里拿着一个黄色的瓶子,瓶子里装着一些白色的气泡。背景中还有一些其他的小装饰物,整体风格非常可爱和简洁。', 'tool_calls': []}, 'logprobs': None, 'finish_reason': 'stop', 'stop_reason': None}], 'usage': {'prompt_tokens': 1351, 'total_tokens': 1443, 'completion_tokens': 92}, 'prompt_logprobs': None}
注意:1.jpg是一张数字1的图片,2同理。很显然,这里只回答了2.jpg
打印了一下vllm里面的prompt
Received request chat-f4b3a5d73db64cab9b90c9e7dde73be0: prompt: '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>\n<|vision_start|><|image_pad|><|vision_end|>\n描述一下这两张照片<|im_end|>\n<|im_start|>assistant\n', params: SamplingParams(n=1, best_of=1, presence_penalty=0.0, frequency_penarepetition_penalty=1.05, temperature=0.7, top_p=0.8, top_k=-1, min_p=0.0, seed=None, use_beam_search=False, length_penalty=1.0, early_stopping=False, stop=[], stop_token_ids=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4096, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None), prompt_token_ids: [151644, 8948, 198, 2610, 525, 1207, 16948, 11, 3465, 553, 54364, 14817, 13, 1446, 525, 264, 10950, 17847, 13, 151645, 198, 151644, 872, 198, 151652, 151655, 151653, 198, 151652, 151655, 151653, 198, 53481, 100158, 106143, 86341, 102184, 151645, 198, 151644, 77091, 198], lora_request: None, prompt_adapter_request: None.