forked from EngineX-Cambricon/enginex-mlu370-vllm
add qwen3
This commit is contained in:
35
vllm-v0.6.2/examples/production_monitoring/dummy_client.py
Normal file
35
vllm-v0.6.2/examples/production_monitoring/dummy_client.py
Normal file
@@ -0,0 +1,35 @@
|
||||
import requests
|
||||
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import (
|
||||
OTLPSpanExporter)
|
||||
from opentelemetry.sdk.trace import TracerProvider
|
||||
from opentelemetry.sdk.trace.export import (BatchSpanProcessor,
|
||||
ConsoleSpanExporter)
|
||||
from opentelemetry.trace import SpanKind, set_tracer_provider
|
||||
from opentelemetry.trace.propagation.tracecontext import (
|
||||
TraceContextTextMapPropagator)
|
||||
|
||||
trace_provider = TracerProvider()
|
||||
set_tracer_provider(trace_provider)
|
||||
|
||||
trace_provider.add_span_processor(BatchSpanProcessor(OTLPSpanExporter()))
|
||||
trace_provider.add_span_processor(BatchSpanProcessor(ConsoleSpanExporter()))
|
||||
|
||||
tracer = trace_provider.get_tracer("dummy-client")
|
||||
|
||||
url = "http://localhost:8000/v1/completions"
|
||||
with tracer.start_as_current_span("client-span", kind=SpanKind.CLIENT) as span:
|
||||
prompt = "San Francisco is a"
|
||||
span.set_attribute("prompt", prompt)
|
||||
headers = {}
|
||||
TraceContextTextMapPropagator().inject(headers)
|
||||
payload = {
|
||||
"model": "facebook/opt-125m",
|
||||
"prompt": prompt,
|
||||
"max_tokens": 10,
|
||||
"best_of": 20,
|
||||
"n": 3,
|
||||
"use_beam_search": "true",
|
||||
"temperature": 0.0,
|
||||
# "stream": True,
|
||||
}
|
||||
response = requests.post(url, headers=headers, json=payload)
|
||||
Reference in New Issue
Block a user