update result for ascend

ascend
add ascend support
2025-09-04 09:53:55 +08:00 · 2025-09-04 09:53:23 +08:00 · 2025-09-03 10:16:24 +08:00 · 2025-08-28 12:49:39 +08:00 · 2025-08-28 11:17:44 +08:00 · 2025-08-26 18:13:06 +08:00
11 changed files with 57 additions and 63 deletions
--- a/1
+++ b/1
@@ -1 +0,0 @@
-Dockerfile.bi100
--- a/5
+++ b/5
@@ -0,0 +1,5 @@
+FROM quay.io/ascend/vllm-ascend:v0.10.0rc1
+
+WORKDIR /workspace
+RUN pip install diffusers==0.34.0
+COPY main.py dataset.json /workspace/
--- a/Dockerfile.a100
+++ b/Dockerfile.a100
@@ -1,5 +0,0 @@
-FROM harbor-contest.4pd.io/zhangyiqun/public/pytorch:2.6.0-cuda12.4-cudnn9-devel
-WORKDIR /workspace
-# ENV PT_SDPA_ENABLE_HEAD_DIM_PADDING=1
-RUN pip install diffusers transformers sentencepiece -i https://nexus.4pd.io/repository/pypi-all/simple
-COPY main.py test.sh dataset.json /workspace/
--- a/Dockerfile.bi100
+++ b/Dockerfile.bi100
@@ -1,6 +0,0 @@
-FROM git.modelhub.org.cn:980/enginex-iluvatar/bi100-3.2.1-x86-ubuntu20.04-py3.10-poc-llm-infer:v1.2.2
-
-WORKDIR /workspace
-ENV PT_SDPA_ENABLE_HEAD_DIM_PADDING=1
-RUN pip install diffusers==0.34.0
-COPY main.py test.sh dataset.json /workspace/
--- a/Dockerfile.mrv100
+++ b/Dockerfile.mrv100
@@ -1,7 +0,0 @@
-FROM git.modelhub.org.cn:980/enginex-iluvatar/mr100_corex:4.3.0
-
-WORKDIR /workspace
-COPY whls-mrv100 /packages
-RUN pip install diffusers==0.34.0 sentencepiece transformers==4.55.2
-# RUN pip install /packages/*.whl
-COPY main.py test.sh dataset.json /workspace/
--- a/README.md
+++ b/README.md
@@ -1,28 +1,25 @@
-## Installation
-参考Dockerfile，构建运行镜像
-
 ## Quickstart

-### 测试程序
-1. 下载模型：https://modelscope.cn/models/AI-ModelScope/stable-diffusion-v1-5
-
-2. 运行测试程序
-
-修改测试程序`test.py`里面的模型路径，直接执行即可
-
+### 构建镜像
 ```bash
-python3 test.py
+docker build -t diffusers:v0.1 .
 ```

-### 批量测试程序
-1. 准备输入数据集`dataset.json`，可以参考示例`dataset.json`
-2. 运行测试程序
+### 模型下载
+模型地址：https://modelscope.cn/models/AI-ModelScope/stable-diffusion-v1-5
+并放到目录：`/mnt/contest_ceph/zhanghao/models/stable-diffusion-v1-5`（如更改目录，请修改后面的执行脚本中的模型路径）
+
+### 测试程序
+1. 准备输入数据集，可以参考示例`dataset.json`
+2. 在docker镜像里运行测试程序，会根据`dataset.json`内容，在`output`目录下生成图片文件。
 ```bash
-python3 main.py --model "/mnt/contest_ceph/zhanghao/models/stable-diffusion-v1-5" --json  "dataset.json" --results "results.json" --outdir "output" --device cuda --dtype fp16
+./run_in_docker.sh
 ```

 ## 测试结果
-|      | A100 平均生成时间（秒） | 天垓100 平均生成时间（秒） |
-|------|-------------------------|----------------------------|
-| 时间 | 1.4289                  | 7.5609                     |
+### stable-diffusion-v1-5模型
+
+|      | A100 平均生成时间（秒） | 昇腾910B 平均生成时间（秒） |
+|------|-------------------------|----------------------------|
+| 时间 | 1.6208                  | 7.2901                     |

--- a/main.py
+++ b/main.py
@@ -54,8 +54,12 @@ def build_pipeline(model_path: str, device: str = "cuda", dtype=torch.float16):
        use_safetensors=True,
    )
    # 设备放置
-    if device == "cuda" and torch.cuda.is_available():
-        pipe.to("cuda")
+    if device == "cuda":
+        if torch.cuda.is_available():
+            pipe.to("cuda")
+        elif torch.npu.is_available():
+            pipe.to("npu")
+      
        try:
            pipe.enable_attention_slicing()
        except Exception:
@@ -85,30 +89,33 @@ def generate_one(pipe: DiffusionPipeline, cfg: dict, out_dir: Path, index: int):
    """
    prompt = cfg["prompt"]
    negative_prompt = cfg.get("negative_prompt", None)
-    steps = int(cfg.get("num_inference_steps", 20))
-    guidance = float(cfg.get("guidance_scale", 7.5))
+    steps = int(cfg.get("num_inference_steps", 0))
+    guidance = float(cfg.get("guidance_scale", 0))
    seed = cfg.get("seed", None)
    width = cfg.get("width", None)
    height = cfg.get("height", None)

    # 随机数生成器（与管线设备一致）
-    gen = None
-    try:
-        device_str = str(getattr(pipe, "device", "cuda" if torch.cuda.is_available() else "cpu"))
-    except Exception:
-        device_str = "cuda" if torch.cuda.is_available() else "cpu"
-    if seed is not None:
-        gen = torch.Generator(device=device_str).manual_seed(int(seed))
+    # gen = None
+    # try:
+    #     device_str = str(getattr(pipe, "device", "cuda" if torch.cuda.is_available() else "npu" if torch.npu.is_available() else "cpu"))
+    # except Exception:
+    #     device_str = "cuda" if torch.cuda.is_available() else "npu" if torch.npu.is_available() else "cpu"
+    # if seed is not None:
+    #     gen = torch.Generator(device=device_str).manual_seed(int(seed))

    call_kwargs = dict(
        prompt=prompt,
-        negative_prompt=negative_prompt,
-        num_inference_steps=steps,
-        guidance_scale=guidance,
-        generator=gen,
+    #     generator=gen,
    )
    if width is not None and height is not None:
        call_kwargs.update({"width": int(width), "height": int(height)})
+    if negative_prompt is not None:
+        call_kwargs.update({"negative_prompt": negative_prompt})
+    if guidance > 0:
+        call_kwargs.update({"guidance_scale": guidance})
+    if steps > 0:
+        call_kwargs.update({"num_inference_steps": steps})

    start = time.time()
    images = pipe(**call_kwargs).images
@@ -145,6 +152,9 @@ def main():
    parser.add_argument("--outdir", required=True, help="图片输出目录")
    parser.add_argument("--device", default="cuda", choices=["cuda", "cpu"], help="推理设备")
    parser.add_argument("--dtype", default="fp16", choices=["fp16", "fp32"], help="推理精度")
+    parser.add_argument("--negative_prompt", default=None, help="negative_prompt")
+    parser.add_argument("--num_inference_steps", default=0, help="num_inference_steps")
+    parser.add_argument("--guidance_scale", default=0, help="guidance_scale")
    args = parser.parse_args()

    model_path = args.model
@@ -166,6 +176,13 @@ def main():
    records = []
    total_start = time.time()
    for i, cfg in enumerate(prompts, 1):
+        if args.negative_prompt:
+            cfg["negative_prompt"] = args.negative_prompt
+        if args.num_inference_steps:
+            cfg["num_inference_steps"] = args.num_inference_steps
+        if args.guidance_scale:
+            cfg["guidance_scale"] = args.guidance_scale
+
        out_path, elapsed, detail = generate_one(pipe, cfg, out_dir, i)
        print(f"[{i}/{len(prompts)}] saved: {out_path.name}  elapsed: {elapsed:.3f}s")
        records.append(detail)
--- a/run_in_docker.sh
+++ b/run_in_docker.sh
@@ -1,3 +1,3 @@
 #! /usr/bin/env bash
-image=harbor-contest.4pd.io/zhanghao/diffusers:bi100-0.2
-docker run -it -v /root/zhanghao:/workspace -v /mnt:/mnt --device=dev/iluvatar1:/dev/iluvatar0 $image bash
+image=diffusers:v0.1
+docker run -v /mnt/contest_ceph/zhanghao/models/stable-diffusion-v1-5:/workspace/stable-diffusion-v1-5 --device=dev/iluvatar1:/dev/iluvatar0 $image python3 main.py --model "./stable-diffusion-v1-5" --json  "dataset.json" --results "results.json" --outdir "output" --device cuda --dtype fp16
--- a/run_in_docker_a100.sh
+++ b/run_in_docker_a100.sh
@@ -1,3 +0,0 @@
-#! /usr/bin/env bash
-image=harbor-contest.4pd.io/zhanghao/diffusers:a100-0.2
-docker run -it -v /home/zhanghao/workspace:/workspace -v /mnt:/mnt $image bash
--- a/run_in_docker_mrv100.sh
+++ b/run_in_docker_mrv100.sh
@@ -1,3 +0,0 @@
-#! /usr/bin/env bash
-image=harbor-contest.4pd.io/zhanghao/diffusers:mrv100-0.2
-docker run -it -v /root/zhanghao:/workspace -v /mnt:/mnt --device=dev/iluvatar0:/dev/iluvatar0 $image bash
--- a/test.py
+++ b/test.py
@@ -5,9 +5,10 @@ import time
 model_path = "/mnt/contest_ceph/zhanghao/models/stable-diffusion-v1-5"
 # model_path = "/mnt/contest_ceph/zhanghao/models/stable-diffusion-3.5-medium"
 pipeline = DiffusionPipeline.from_pretrained(model_path, torch_dtype=torch.float16)
-pipeline.to("cuda")
+device = "cuda" if torch.cuda.is_available() else "npu" if torch.npu.is_available() else "cpu"
+pipeline.to(device)
 start = time.time()
 image = pipeline("An image of a squirrel in Picasso style").images[0]
 end = time.time()
 print(f"elapsed: {end - start}")
-image.save("squirrel_picasso.png")
+image.save("squirrel_picasso.png")
--- a/test.sh
+++ b/test.sh
@@ -1 +0,0 @@
-python3 main.py --model "/mnt/contest_ceph/zhanghao/models/stable-diffusion-v1-5" --json  "dataset.json" --results "results.json" --outdir "output" --device cuda --dtype fp16
Author	SHA1	Message	Date
Zhang Hao	074098df13	update result for ascend	2025-09-04 09:53:55 +08:00
Zhang Hao	5d0b6f4e1f	ascend	2025-09-04 09:53:23 +08:00
root	31c16831b1	add ascend support	2025-09-03 10:16:24 +08:00
ZHANG Hao	33c57acb1b	add input config	2025-08-28 12:49:39 +08:00
ZHANG Hao	cdefc1873e	fix diffusers version	2025-08-28 11:17:44 +08:00
Zhang Hao	be947dfc79	update readme	2025-08-26 18:13:06 +08:00
Zhang Hao	fcf7f30797	update base image of a100	2025-08-26 17:31:39 +08:00
ZHANG Hao	b9906fa791	change base image for mlu370	2025-08-26 17:25:25 +08:00
root	ce8d16c160	update run_in_docker_mlu370 scripts	2025-08-26 15:35:32 +08:00
ZHANG Hao	b524f25741	support mlu370	2025-08-25 17:07:12 +08:00
				`@@ -1 +0,0 @@`
				`python3 main.py --model "/mnt/contest_ceph/zhanghao/models/stable-diffusion-v1-5" --json "dataset.json" --results "results.json" --outdir "output" --device cuda --dtype fp16`