[main] remove dbo code (#3712)

### What this PR does / why we need it? Remove codes of dbo. Currently, vLLM has supported dbo with pr: https://github.com/vllm-project/vllm/pull/23693. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.11.0rc3 - vLLM main: 17c540a993 Signed-off-by: zzzzwwjj <1183291235@qq.com>
2025-10-25 15:53:01 +08:00
parent d9cdc65854
commit e5676fc36e
26 changed files with 69 additions and 1588 deletions
--- a/examples/offline_dualbatch_overlap_npu.py
+++ b/examples/offline_dualbatch_overlap_npu.py
@@ -1,52 +0,0 @@
-import os
-import time
-
-from vllm import LLM, SamplingParams
-
-os.environ["VLLM_USE_MODELSCOPE"] = "True"
-os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
-# enable dual-batch overlap for vllm ascend
-os.environ["VLLM_ASCEND_ENABLE_DBO"] = "1"
-
-# Sample prompts.
-prompts = ["The president of the United States is"] * 41
-# Create a sampling params object.
-sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
-
-
-def main():
-    # Create an LLM.
-    llm = LLM(model="deepseek-ai/DeepSeek-V3-Lite-base-latest-w8a8-dynamic",
-              enforce_eager=True,
-              tensor_parallel_size=2,
-              max_model_len=4096,
-              trust_remote_code=True,
-              enable_expert_parallel=True,
-              additional_config={
-                  "torchair_graph_config": {
-                      "enabled": False
-                  },
-                  "ascend_scheduler_config": {
-                      "enabled": True
-                  },
-              })
-
-    # Generate texts from the prompts. The output is a list of RequestOutput
-    # objects that contain the prompt, generated text, and other information.
-    outputs = llm.generate(prompts, sampling_params)
-
-    # Print the outputs.
-    print("-" * 50)
-    for output in outputs:
-        prompt = output.prompt
-        generated_text = output.outputs[0].text
-        print(f"Prompt: {prompt!r}\nGenerated text: {generated_text!r}")
-        print("-" * 50)
-
-    # Add a buffer to wait for profiler in the background process
-    # (in case MP is on) to finish writing profiling output.
-    time.sleep(10)
-
-
-if __name__ == "__main__":
-    main()