前言

看了下 Qwen3-Coder 接入 VS Code 的编写时间,一个月过去公司就下线了大模型 API,换成了需要额外安装的工具😓😓😓,一开始还是用 API 构造的模型比较,后面换成本地 Ollama 了,system_prompt 用的都是同一个。

1
2
3
4
5
6
7
8
9
10
11
12
13
system_prompt = """
Generate a concise image description for 3D model generation. DO NOT ask for additional information or engage in dialogue - work with what is provided.

CRITICAL RULES:
1. MAX 75 TOKENS (CLIP tokenizer)
2. ENGLISH ONLY
3. NO BACKGROUND (transparent/white)
4. Full body view from front
5. Start with "Generate an image of..."
6. Generate the image description in ENGLISH ONLY, regardless of the input language. Do not mix languages in your response.

Include: subject, key details, materials, lighting, perspective. Be specific but concise. SHOW THE COMPLETE SUBJECT - no cropping, no cutoff body parts. For animals, show full body from nose to tail. For characters, show from head to toe.
"""

OpenAI Compatible API

试了下 qwen3-32bqwen3-coder-30b-a3b-instructqwen3-coder-480b-a35b-instruct-fp8,当时没记录测试结果,现在只能展示页面了…

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
from typing import Any
import asyncio
import time
import gradio as gr
import tiktoken

from agents import (
Agent,
Runner,
set_default_openai_api,
set_default_openai_client,
set_trace_processors,
)
from openai import AsyncOpenAI
from agents.tracing.processor_interface import TracingProcessor
from agents.tracing.spans import Span
from agents.tracing.traces import Trace


# define custom tracing processor;
# default is to call home to OpenAI reusing the provided
# model API key, which won't work with litellm proxy
class MyTracingProcessor(TracingProcessor):
def on_trace_start(self, trace: Trace) -> None:
print(f"on_trace_start({trace.export()})")

def on_trace_end(self, trace: Trace) -> None:
print(f"on_trace_end({trace.export()})")

def on_span_start(self, span: Span[Any]) -> None:
print(f"on_span_start({span.export()})")

def on_span_end(self, span: Span[Any]) -> None:
print(f"on_span_end({span.export()})")

def shutdown(self) -> None:
print("shutdown()")

def force_flush(self) -> None:
print("force_flush()")


# create OpenAI client running against the litellm proxy, plug it in,
# switch from new responses API to more common chat completions API,
# and replace the home-calling tracing processor with a custom one
custom_client = AsyncOpenAI(
base_url="BASE_URL", api_key="API_KEY"
)
set_default_openai_client(custom_client)
set_default_openai_api("chat_completions")
set_trace_processors([MyTracingProcessor()])

# create system prompt for agent
system_prompt = """
Generate a concise image description for 3D model generation. DO NOT ask for additional information or engage in dialogue - work with what is provided.

CRITICAL RULES:
1. MAX 75 TOKENS (CLIP tokenizer)
2. ENGLISH ONLY
3. NO BACKGROUND (transparent/white)
4. Full body view from front
5. Start with "Generate an image of..."
6. Generate the image description in ENGLISH ONLY, regardless of the input language. Do not mix languages in your response.

Include: subject, key details, materials, lighting, perspective. Be specific but concise. SHOW THE COMPLETE SUBJECT - no cropping, no cutoff body parts. For animals, show full body from nose to tail. For characters, show from head to toe.
"""


async def generate_response(agent, prompt):
"""Generate response from a single agent and measure time"""
start_time = time.time()
try:
response = await Runner.run(agent, prompt)
elapsed_time = time.time() - start_time

# Calculate token count
encoder = tiktoken.get_encoding("cl100k_base")
token_count = len(encoder.encode(response.final_output))

return response.final_output, elapsed_time, token_count
except Exception as e:
elapsed_time = time.time() - start_time
return f"Error: {str(e)}", elapsed_time, 0


def generate_all_responses(user_input):
"""Generate responses from all models with timing and token count"""
start_time = time.time()
model_ids = [
"qwen3-32b",
"qwen3-coder-30b-a3b-instruct",
"qwen3-coder-480b-a35b-instruct-fp8",
]
responses = []
timings = []
token_counts = []

# Create agents and generate responses
for model_id in model_ids:
try:
agent = Agent(name="Assistant", instructions=system_prompt, model=model_id)
response, elapsed_time, token_count = asyncio.run(
generate_response(agent, user_input)
)
# Strip leading and trailing whitespace from response
responses.append(response.strip())
timings.append(f"{elapsed_time:.2f} seconds")
token_counts.append(f"{token_count} tokens")
except Exception as e:
responses.append(f"Error with model {model_id}: {str(e)}")
timings.append("N/A")
token_counts.append("N/A")

total_elapsed_time = time.time() - start_time

# Log user prompt and total duration to console
print(f"User Prompt: {user_input}")
print(f"Total Processing Duration: {total_elapsed_time:.2f} seconds")

# Return responses, timings, and token counts as separate tuples
return tuple(responses + timings + token_counts)


# Create Gradio interface
with gr.Blocks(title="Image Description Generator for 3D Model") as demo:
gr.Markdown("# Image Description Generator for 3D Model")
gr.Markdown(
"Enter your 3D model description below and get AI-generated image descriptions optimized for 3D generation pipelines. Note: Token counts are calculated using the CLIP tokenizer (used by Stable Diffusion). Other models may report slightly different counts due to different tokenization schemes."
)

with gr.Row():
input_text = gr.Textbox(
label="Describe your 3D model",
placeholder="E.g., 'A futuristic cyberpunk drone with neon lights'",
lines=3,
)

with gr.Row():
submit_btn = gr.Button("Generate Descriptions", variant="primary")

with gr.Row():
with gr.Column():
gr.Markdown("### Qwen3-32B")
output1 = gr.Textbox(label="Result", lines=5, interactive=False)
timing1 = gr.Textbox(label="Generation Time", interactive=False)
tokens1 = gr.Textbox(label="Token Count (CLIP)", interactive=False)
with gr.Column():
gr.Markdown("### Qwen3-Coder-30B")
output2 = gr.Textbox(label="Result", lines=5, interactive=False)
timing2 = gr.Textbox(label="Generation Time", interactive=False)
tokens2 = gr.Textbox(label="Token Count (CLIP)", interactive=False)
with gr.Column():
gr.Markdown("### Qwen3-Coder-480B")
output3 = gr.Textbox(label="Result", lines=5, interactive=False)
timing3 = gr.Textbox(label="Generation Time", interactive=False)
tokens3 = gr.Textbox(label="Token Count (CLIP)", interactive=False)

submit_btn.click(
fn=generate_all_responses,
inputs=input_text,
outputs=[
output1,
output2,
output3,
timing1,
timing2,
timing3,
tokens1,
tokens2,
tokens3,
],
)

if __name__ == "__main__":
demo.launch(server_name="0.0.0.0")

Ollama

本地拉了一些模型:llama3.1:8bllama3.2:3bgemma3:4bqwen2:7bqwen2.5:7bqwen3:8bdeepseek-r1:8b。有时生成的结果不符合要求,token 过长在生成图片时会被截断。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import gradio as gr
import ollama
import time
import tiktoken

system_prompt = """
Generate a concise image description for 3D model generation. DO NOT ask for additional information or engage in dialogue - work with what is provided.

CRITICAL RULES:
1. MAX 75 TOKENS (CLIP tokenizer)
2. ENGLISH ONLY
3. NO BACKGROUND (transparent/white)
4. Full body view from front
5. Start with "Generate an image of..."
6. Generate the image description in ENGLISH ONLY, regardless of the input language. Do not mix languages in your response.

Include: subject, key details, materials, lighting, perspective. Be specific but concise. SHOW THE COMPLETE SUBJECT - no cropping, no cutoff body parts. For animals, show full body from nose to tail. For characters, show from head to toe.
"""


def generate_all_responses(user_input):
start_time = time.time()
model_ids = [
"llama3.1:8b",
"llama3.2:3b",
"gemma3:4b",
"qwen2:7b",
"qwen2.5:7b",
"qwen3:8b",
"deepseek-r1:8b",
]
responses = []
timings = []
token_counts = []
encoder = tiktoken.get_encoding("cl100k_base")

messages = [
{
"role": "system",
"content": system_prompt,
},
{
"role": "user",
"content": user_input,
},
]

for model_id in model_ids:
try:
response = ollama.chat(model=model_id, messages=messages, think=False)
final_output = response["message"]["content"]
responses.append(final_output.strip())
timings.append(f"{response.total_duration/1_000_000_000:.2f} seconds")
token_count = len(encoder.encode(final_output))
token_counts.append(f"{token_count} tokens")
print(f"{model_id}:{final_output.strip()}")
except Exception as e:
responses.append(f"Error with model {model_id}: {str(e)}")
timings.append("N/A")
token_counts.append("N/A")

total_elapsed_time = time.time() - start_time

# Log user prompt and total duration to console
print(f"User Prompt: {user_input}")
print(f"Total Processing Duration: {total_elapsed_time:.2f} seconds")

# Return responses, timings, and token counts as separate tuples
return tuple(responses + timings + token_counts)


# Create Gradio interface
with gr.Blocks(title="Image Description Generator for 3D Model") as demo:
gr.Markdown("# Image Description Generator for 3D Model")
gr.Markdown(
"Enter your 3D model description below and get AI-generated image descriptions optimized for 3D generation pipelines. Note: Token counts are calculated using the CLIP tokenizer (used by Stable Diffusion). Other models may report slightly different counts due to different tokenization schemes."
)

with gr.Row():
input_text = gr.Textbox(
label="Describe your 3D model",
placeholder="E.g., 'A futuristic cyberpunk drone with neon lights'",
lines=3,
)

with gr.Row():
submit_btn = gr.Button("Generate Descriptions", variant="primary")

with gr.Row():
with gr.Column():
gr.Markdown("### llama3.1:8b")
output1 = gr.Textbox(label="Result", lines=5, interactive=False)
timing1 = gr.Textbox(label="Generation Time", interactive=False)
tokens1 = gr.Textbox(label="Token Count (CLIP)", interactive=False)
gr.Markdown("### qwen2:7b")
output4 = gr.Textbox(label="Result", lines=5, interactive=False)
timing4 = gr.Textbox(label="Generation Time", interactive=False)
tokens4 = gr.Textbox(label="Token Count (CLIP)", interactive=False)
gr.Markdown("### deepseek-r1:8b")
output7 = gr.Textbox(label="Result", lines=5, interactive=False)
timing7 = gr.Textbox(label="Generation Time", interactive=False)
tokens7 = gr.Textbox(label="Token Count (CLIP)", interactive=False)
with gr.Column():
gr.Markdown("### llama3.2:3b")
output2 = gr.Textbox(label="Result", lines=5, interactive=False)
timing2 = gr.Textbox(label="Generation Time", interactive=False)
tokens2 = gr.Textbox(label="Token Count (CLIP)", interactive=False)
gr.Markdown("### qwen2.5:7b")
output5 = gr.Textbox(label="Result", lines=5, interactive=False)
timing5 = gr.Textbox(label="Generation Time", interactive=False)
tokens5 = gr.Textbox(label="Token Count (CLIP)", interactive=False)
with gr.Column():
gr.Markdown("### gemma3:4b")
output3 = gr.Textbox(label="Result", lines=5, interactive=False)
timing3 = gr.Textbox(label="Generation Time", interactive=False)
tokens3 = gr.Textbox(label="Token Count (CLIP)", interactive=False)
gr.Markdown("### qwen3:8b")
output6 = gr.Textbox(label="Result", lines=5, interactive=False)
timing6 = gr.Textbox(label="Generation Time", interactive=False)
tokens6 = gr.Textbox(label="Token Count (CLIP)", interactive=False)

submit_btn.click(
fn=generate_all_responses,
inputs=input_text,
outputs=[
output1,
output2,
output3,
output4,
output5,
output6,
output7,
timing1,
timing2,
timing3,
timing4,
timing5,
timing6,
timing7,
tokens1,
tokens2,
tokens3,
tokens4,
tokens5,
tokens6,
tokens7,
],
)

if __name__ == "__main__":
demo.launch(server_name="0.0.0.0")
  1. 一只黄色的小狗

  2. 一只蓝色的小鸟

参阅