Compare commits

...

10 Commits

Author SHA1 Message Date
074098df13 update result for ascend 2025-09-04 09:53:55 +08:00
5d0b6f4e1f ascend 2025-09-04 09:53:23 +08:00
root
31c16831b1 add ascend support 2025-09-03 10:16:24 +08:00
33c57acb1b add input config 2025-08-28 12:49:39 +08:00
cdefc1873e fix diffusers version 2025-08-28 11:17:44 +08:00
be947dfc79 update readme 2025-08-26 18:13:06 +08:00
fcf7f30797 update base image of a100 2025-08-26 17:31:39 +08:00
b9906fa791 change base image for mlu370 2025-08-26 17:25:25 +08:00
root
ce8d16c160 update run_in_docker_mlu370 scripts 2025-08-26 15:35:32 +08:00
b524f25741 support mlu370 2025-08-25 17:07:12 +08:00
11 changed files with 57 additions and 63 deletions

View File

@@ -1 +0,0 @@
Dockerfile.bi100

5
Dockerfile Normal file
View File

@@ -0,0 +1,5 @@
FROM quay.io/ascend/vllm-ascend:v0.10.0rc1
WORKDIR /workspace
RUN pip install diffusers==0.34.0
COPY main.py dataset.json /workspace/

View File

@@ -1,5 +0,0 @@
FROM harbor-contest.4pd.io/zhangyiqun/public/pytorch:2.6.0-cuda12.4-cudnn9-devel
WORKDIR /workspace
# ENV PT_SDPA_ENABLE_HEAD_DIM_PADDING=1
RUN pip install diffusers transformers sentencepiece -i https://nexus.4pd.io/repository/pypi-all/simple
COPY main.py test.sh dataset.json /workspace/

View File

@@ -1,6 +0,0 @@
FROM git.modelhub.org.cn:980/enginex-iluvatar/bi100-3.2.1-x86-ubuntu20.04-py3.10-poc-llm-infer:v1.2.2
WORKDIR /workspace
ENV PT_SDPA_ENABLE_HEAD_DIM_PADDING=1
RUN pip install diffusers==0.34.0
COPY main.py test.sh dataset.json /workspace/

View File

@@ -1,7 +0,0 @@
FROM git.modelhub.org.cn:980/enginex-iluvatar/mr100_corex:4.3.0
WORKDIR /workspace
COPY whls-mrv100 /packages
RUN pip install diffusers==0.34.0 sentencepiece transformers==4.55.2
# RUN pip install /packages/*.whl
COPY main.py test.sh dataset.json /workspace/

View File

@@ -1,28 +1,25 @@
## Installation
参考Dockerfile构建运行镜像
## Quickstart
### 测试程序
1. 下载模型https://modelscope.cn/models/AI-ModelScope/stable-diffusion-v1-5
2. 运行测试程序
修改测试程序`test.py`里面的模型路径,直接执行即可
### 构建镜像
```bash
python3 test.py
docker build -t diffusers:v0.1 .
```
### 批量测试程序
1. 准备输入数据集`dataset.json`,可以参考示例`dataset.json`
2. 运行测试程序
### 模型下载
模型地址https://modelscope.cn/models/AI-ModelScope/stable-diffusion-v1-5
并放到目录:`/mnt/contest_ceph/zhanghao/models/stable-diffusion-v1-5`(如更改目录,请修改后面的执行脚本中的模型路径)
### 测试程序
1. 准备输入数据集,可以参考示例`dataset.json`
2. 在docker镜像里运行测试程序会根据`dataset.json`内容,在`output`目录下生成图片文件。
```bash
python3 main.py --model "/mnt/contest_ceph/zhanghao/models/stable-diffusion-v1-5" --json "dataset.json" --results "results.json" --outdir "output" --device cuda --dtype fp16
./run_in_docker.sh
```
## 测试结果
| | A100 平均生成时间(秒) | 天垓100 平均生成时间(秒) |
|------|-------------------------|----------------------------|
| 时间 | 1.4289 | 7.5609 |
### stable-diffusion-v1-5模型
| | A100 平均生成时间(秒) | 昇腾910B 平均生成时间(秒) |
|------|-------------------------|----------------------------|
| 时间 | 1.6208 | 7.2901 |

47
main.py
View File

@@ -54,8 +54,12 @@ def build_pipeline(model_path: str, device: str = "cuda", dtype=torch.float16):
use_safetensors=True,
)
# 设备放置
if device == "cuda" and torch.cuda.is_available():
pipe.to("cuda")
if device == "cuda":
if torch.cuda.is_available():
pipe.to("cuda")
elif torch.npu.is_available():
pipe.to("npu")
try:
pipe.enable_attention_slicing()
except Exception:
@@ -85,30 +89,33 @@ def generate_one(pipe: DiffusionPipeline, cfg: dict, out_dir: Path, index: int):
"""
prompt = cfg["prompt"]
negative_prompt = cfg.get("negative_prompt", None)
steps = int(cfg.get("num_inference_steps", 20))
guidance = float(cfg.get("guidance_scale", 7.5))
steps = int(cfg.get("num_inference_steps", 0))
guidance = float(cfg.get("guidance_scale", 0))
seed = cfg.get("seed", None)
width = cfg.get("width", None)
height = cfg.get("height", None)
# 随机数生成器(与管线设备一致)
gen = None
try:
device_str = str(getattr(pipe, "device", "cuda" if torch.cuda.is_available() else "cpu"))
except Exception:
device_str = "cuda" if torch.cuda.is_available() else "cpu"
if seed is not None:
gen = torch.Generator(device=device_str).manual_seed(int(seed))
# gen = None
# try:
# device_str = str(getattr(pipe, "device", "cuda" if torch.cuda.is_available() else "npu" if torch.npu.is_available() else "cpu"))
# except Exception:
# device_str = "cuda" if torch.cuda.is_available() else "npu" if torch.npu.is_available() else "cpu"
# if seed is not None:
# gen = torch.Generator(device=device_str).manual_seed(int(seed))
call_kwargs = dict(
prompt=prompt,
negative_prompt=negative_prompt,
num_inference_steps=steps,
guidance_scale=guidance,
generator=gen,
# generator=gen,
)
if width is not None and height is not None:
call_kwargs.update({"width": int(width), "height": int(height)})
if negative_prompt is not None:
call_kwargs.update({"negative_prompt": negative_prompt})
if guidance > 0:
call_kwargs.update({"guidance_scale": guidance})
if steps > 0:
call_kwargs.update({"num_inference_steps": steps})
start = time.time()
images = pipe(**call_kwargs).images
@@ -145,6 +152,9 @@ def main():
parser.add_argument("--outdir", required=True, help="图片输出目录")
parser.add_argument("--device", default="cuda", choices=["cuda", "cpu"], help="推理设备")
parser.add_argument("--dtype", default="fp16", choices=["fp16", "fp32"], help="推理精度")
parser.add_argument("--negative_prompt", default=None, help="negative_prompt")
parser.add_argument("--num_inference_steps", default=0, help="num_inference_steps")
parser.add_argument("--guidance_scale", default=0, help="guidance_scale")
args = parser.parse_args()
model_path = args.model
@@ -166,6 +176,13 @@ def main():
records = []
total_start = time.time()
for i, cfg in enumerate(prompts, 1):
if args.negative_prompt:
cfg["negative_prompt"] = args.negative_prompt
if args.num_inference_steps:
cfg["num_inference_steps"] = args.num_inference_steps
if args.guidance_scale:
cfg["guidance_scale"] = args.guidance_scale
out_path, elapsed, detail = generate_one(pipe, cfg, out_dir, i)
print(f"[{i}/{len(prompts)}] saved: {out_path.name} elapsed: {elapsed:.3f}s")
records.append(detail)

View File

@@ -1,3 +1,3 @@
#! /usr/bin/env bash
image=harbor-contest.4pd.io/zhanghao/diffusers:bi100-0.2
docker run -it -v /root/zhanghao:/workspace -v /mnt:/mnt --device=dev/iluvatar1:/dev/iluvatar0 $image bash
image=diffusers:v0.1
docker run -v /mnt/contest_ceph/zhanghao/models/stable-diffusion-v1-5:/workspace/stable-diffusion-v1-5 --device=dev/iluvatar1:/dev/iluvatar0 $image python3 main.py --model "./stable-diffusion-v1-5" --json "dataset.json" --results "results.json" --outdir "output" --device cuda --dtype fp16

View File

@@ -1,3 +0,0 @@
#! /usr/bin/env bash
image=harbor-contest.4pd.io/zhanghao/diffusers:a100-0.2
docker run -it -v /home/zhanghao/workspace:/workspace -v /mnt:/mnt $image bash

View File

@@ -1,3 +0,0 @@
#! /usr/bin/env bash
image=harbor-contest.4pd.io/zhanghao/diffusers:mrv100-0.2
docker run -it -v /root/zhanghao:/workspace -v /mnt:/mnt --device=dev/iluvatar0:/dev/iluvatar0 $image bash

View File

@@ -5,9 +5,10 @@ import time
model_path = "/mnt/contest_ceph/zhanghao/models/stable-diffusion-v1-5"
# model_path = "/mnt/contest_ceph/zhanghao/models/stable-diffusion-3.5-medium"
pipeline = DiffusionPipeline.from_pretrained(model_path, torch_dtype=torch.float16)
pipeline.to("cuda")
device = "cuda" if torch.cuda.is_available() else "npu" if torch.npu.is_available() else "cpu"
pipeline.to(device)
start = time.time()
image = pipeline("An image of a squirrel in Picasso style").images[0]
end = time.time()
print(f"elapsed: {end - start}")
image.save("squirrel_picasso.png")
image.save("squirrel_picasso.png")

View File

@@ -1 +0,0 @@
python3 main.py --model "/mnt/contest_ceph/zhanghao/models/stable-diffusion-v1-5" --json "dataset.json" --results "results.json" --outdir "output" --device cuda --dtype fp16