Compare commits
13 Commits
4d35251531
...
feat/ascen
| Author | SHA1 | Date | |
|---|---|---|---|
| a219bd4abd | |||
| 1fbc3c3465 | |||
| cdfce13fb6 | |||
| 074098df13 | |||
| 5d0b6f4e1f | |||
|
|
31c16831b1 | ||
| 33c57acb1b | |||
| cdefc1873e | |||
| be947dfc79 | |||
| fcf7f30797 | |||
| b9906fa791 | |||
|
|
ce8d16c160 | ||
| b524f25741 |
@@ -1 +0,0 @@
|
|||||||
Dockerfile.bi100
|
|
||||||
5
Dockerfile
Normal file
5
Dockerfile
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
FROM git.modelhub.org.cn:9443/enginex-ascend/vllm-ascend:v0.10.0rc1
|
||||||
|
|
||||||
|
WORKDIR /workspace
|
||||||
|
RUN pip install diffusers==0.34.0
|
||||||
|
COPY main.py dataset.json /workspace/
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
FROM harbor-contest.4pd.io/zhangyiqun/public/pytorch:2.6.0-cuda12.4-cudnn9-devel
|
|
||||||
WORKDIR /workspace
|
|
||||||
# ENV PT_SDPA_ENABLE_HEAD_DIM_PADDING=1
|
|
||||||
RUN pip install diffusers transformers sentencepiece -i https://nexus.4pd.io/repository/pypi-all/simple
|
|
||||||
COPY main.py test.sh dataset.json /workspace/
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
FROM git.modelhub.org.cn:980/enginex-iluvatar/bi100-3.2.1-x86-ubuntu20.04-py3.10-poc-llm-infer:v1.2.2
|
|
||||||
|
|
||||||
WORKDIR /workspace
|
|
||||||
ENV PT_SDPA_ENABLE_HEAD_DIM_PADDING=1
|
|
||||||
RUN pip install diffusers==0.34.0
|
|
||||||
COPY main.py test.sh dataset.json /workspace/
|
|
||||||
@@ -1,7 +0,0 @@
|
|||||||
FROM git.modelhub.org.cn:980/enginex-iluvatar/mr100_corex:4.3.0
|
|
||||||
|
|
||||||
WORKDIR /workspace
|
|
||||||
COPY whls-mrv100 /packages
|
|
||||||
RUN pip install diffusers==0.34.0 sentencepiece transformers==4.55.2
|
|
||||||
# RUN pip install /packages/*.whl
|
|
||||||
COPY main.py test.sh dataset.json /workspace/
|
|
||||||
55
README.md
55
README.md
@@ -1,28 +1,45 @@
|
|||||||
## Installation
|
|
||||||
参考Dockerfile,构建运行镜像
|
|
||||||
|
|
||||||
## Quickstart
|
## Quickstart
|
||||||
|
|
||||||
|
### 构建镜像
|
||||||
|
```bash
|
||||||
|
docker build -t diffusers:v0.1 .
|
||||||
|
```
|
||||||
|
|
||||||
|
### 模型下载
|
||||||
|
模型地址:https://modelscope.cn/models/AI-ModelScope/stable-diffusion-v1-5
|
||||||
|
并放到目录:`/mnt/contest_ceph/zhanghao/models/stable-diffusion-v1-5`(如更改目录,请修改后面的执行脚本中的模型路径)
|
||||||
|
|
||||||
### 测试程序
|
### 测试程序
|
||||||
1. 下载模型:https://modelscope.cn/models/AI-ModelScope/stable-diffusion-v1-5
|
1. 准备输入数据集,可以参考示例`dataset.json`
|
||||||
|
2. 在docker镜像里运行测试程序,会根据`dataset.json`内容,在`output`目录下生成图片文件。
|
||||||
2. 运行测试程序
|
|
||||||
|
|
||||||
修改测试程序`test.py`里面的模型路径,直接执行即可
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python3 test.py
|
./run_in_docker.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
### 批量测试程序
|
## 测试结果对比(A100 vs 昇腾910B)
|
||||||
1. 准备输入数据集`dataset.json`,可以参考示例`dataset.json`
|
|
||||||
2. 运行测试程序
|
|
||||||
```bash
|
|
||||||
python3 main.py --model "/mnt/contest_ceph/zhanghao/models/stable-diffusion-v1-5" --json "dataset.json" --results "results.json" --outdir "output" --device cuda --dtype fp16
|
|
||||||
```
|
|
||||||
|
|
||||||
## 测试结果
|
### 数据集
|
||||||
| | A100 平均生成时间(秒) | 天垓100 平均生成时间(秒) |
|
数据集如下:
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
"A futuristic city skyline at sunset, with flying cars and neon lights, in cyberpunk style",
|
||||||
|
"A traditional Chinese courtyard covered in snow, with red lanterns glowing warmly",
|
||||||
|
"A portrait of a young woman in Renaissance style, oil painting with dramatic lighting",
|
||||||
|
"A cute baby panda playing with bamboo in a watercolor illustration style",
|
||||||
|
"A majestic dragon flying over mountains, depicted in traditional ink wash painting",
|
||||||
|
"A cozy reading corner with bookshelves, a cat on the sofa, and sunlight streaming through the window",
|
||||||
|
"A knight in silver armor riding a horse, fantasy concept art with epic background",
|
||||||
|
"A bowl of ramen with detailed toppings, hyper-realistic food photography style",
|
||||||
|
"An astronaut floating in space, reflected in the helmet visor is planet Earth",
|
||||||
|
"A serene lake surrounded by autumn trees, painted in impressionist style"
|
||||||
|
]
|
||||||
|
```
|
||||||
|
### 测试结果
|
||||||
|
|
||||||
|
| 模型 | A100 平均生成时间(秒) | 昇腾910B 平均生成时间(秒) |
|
||||||
|------|-------------------------|----------------------------|
|
|------|-------------------------|----------------------------|
|
||||||
| 时间 | 1.4289 | 7.5609 |
|
| AI-ModelScope/stable-diffusion-v1-5 | 1.6208 | 7.2901 |
|
||||||
|
| zhanghaohit/karlo-v1-alpha | 3.1624 | 5.2920 |
|
||||||
|
| AI-ModelScope/stable-diffusion-3.5-medium | 5.0231 | 8.9607 |
|
||||||
|
| stabilityai/stable-diffusion-3-medium-diffusers | 3.7976 | 23.5929 |
|
||||||
|
|
||||||
|
|||||||
47
main.py
47
main.py
@@ -54,8 +54,12 @@ def build_pipeline(model_path: str, device: str = "cuda", dtype=torch.float16):
|
|||||||
use_safetensors=True,
|
use_safetensors=True,
|
||||||
)
|
)
|
||||||
# 设备放置
|
# 设备放置
|
||||||
if device == "cuda" and torch.cuda.is_available():
|
if device == "cuda":
|
||||||
pipe.to("cuda")
|
if torch.cuda.is_available():
|
||||||
|
pipe.to("cuda")
|
||||||
|
elif torch.npu.is_available():
|
||||||
|
pipe.to("npu")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
pipe.enable_attention_slicing()
|
pipe.enable_attention_slicing()
|
||||||
except Exception:
|
except Exception:
|
||||||
@@ -85,30 +89,33 @@ def generate_one(pipe: DiffusionPipeline, cfg: dict, out_dir: Path, index: int):
|
|||||||
"""
|
"""
|
||||||
prompt = cfg["prompt"]
|
prompt = cfg["prompt"]
|
||||||
negative_prompt = cfg.get("negative_prompt", None)
|
negative_prompt = cfg.get("negative_prompt", None)
|
||||||
steps = int(cfg.get("num_inference_steps", 20))
|
steps = int(cfg.get("num_inference_steps", 0))
|
||||||
guidance = float(cfg.get("guidance_scale", 7.5))
|
guidance = float(cfg.get("guidance_scale", 0))
|
||||||
seed = cfg.get("seed", None)
|
seed = cfg.get("seed", None)
|
||||||
width = cfg.get("width", None)
|
width = cfg.get("width", None)
|
||||||
height = cfg.get("height", None)
|
height = cfg.get("height", None)
|
||||||
|
|
||||||
# 随机数生成器(与管线设备一致)
|
# 随机数生成器(与管线设备一致)
|
||||||
gen = None
|
# gen = None
|
||||||
try:
|
# try:
|
||||||
device_str = str(getattr(pipe, "device", "cuda" if torch.cuda.is_available() else "cpu"))
|
# device_str = str(getattr(pipe, "device", "cuda" if torch.cuda.is_available() else "npu" if torch.npu.is_available() else "cpu"))
|
||||||
except Exception:
|
# except Exception:
|
||||||
device_str = "cuda" if torch.cuda.is_available() else "cpu"
|
# device_str = "cuda" if torch.cuda.is_available() else "npu" if torch.npu.is_available() else "cpu"
|
||||||
if seed is not None:
|
# if seed is not None:
|
||||||
gen = torch.Generator(device=device_str).manual_seed(int(seed))
|
# gen = torch.Generator(device=device_str).manual_seed(int(seed))
|
||||||
|
|
||||||
call_kwargs = dict(
|
call_kwargs = dict(
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
negative_prompt=negative_prompt,
|
# generator=gen,
|
||||||
num_inference_steps=steps,
|
|
||||||
guidance_scale=guidance,
|
|
||||||
generator=gen,
|
|
||||||
)
|
)
|
||||||
if width is not None and height is not None:
|
if width is not None and height is not None:
|
||||||
call_kwargs.update({"width": int(width), "height": int(height)})
|
call_kwargs.update({"width": int(width), "height": int(height)})
|
||||||
|
if negative_prompt is not None:
|
||||||
|
call_kwargs.update({"negative_prompt": negative_prompt})
|
||||||
|
if guidance > 0:
|
||||||
|
call_kwargs.update({"guidance_scale": guidance})
|
||||||
|
if steps > 0:
|
||||||
|
call_kwargs.update({"num_inference_steps": steps})
|
||||||
|
|
||||||
start = time.time()
|
start = time.time()
|
||||||
images = pipe(**call_kwargs).images
|
images = pipe(**call_kwargs).images
|
||||||
@@ -145,6 +152,9 @@ def main():
|
|||||||
parser.add_argument("--outdir", required=True, help="图片输出目录")
|
parser.add_argument("--outdir", required=True, help="图片输出目录")
|
||||||
parser.add_argument("--device", default="cuda", choices=["cuda", "cpu"], help="推理设备")
|
parser.add_argument("--device", default="cuda", choices=["cuda", "cpu"], help="推理设备")
|
||||||
parser.add_argument("--dtype", default="fp16", choices=["fp16", "fp32"], help="推理精度")
|
parser.add_argument("--dtype", default="fp16", choices=["fp16", "fp32"], help="推理精度")
|
||||||
|
parser.add_argument("--negative_prompt", default=None, help="negative_prompt")
|
||||||
|
parser.add_argument("--num_inference_steps", default=0, help="num_inference_steps")
|
||||||
|
parser.add_argument("--guidance_scale", default=0, help="guidance_scale")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
model_path = args.model
|
model_path = args.model
|
||||||
@@ -166,6 +176,13 @@ def main():
|
|||||||
records = []
|
records = []
|
||||||
total_start = time.time()
|
total_start = time.time()
|
||||||
for i, cfg in enumerate(prompts, 1):
|
for i, cfg in enumerate(prompts, 1):
|
||||||
|
if args.negative_prompt:
|
||||||
|
cfg["negative_prompt"] = args.negative_prompt
|
||||||
|
if args.num_inference_steps:
|
||||||
|
cfg["num_inference_steps"] = args.num_inference_steps
|
||||||
|
if args.guidance_scale:
|
||||||
|
cfg["guidance_scale"] = args.guidance_scale
|
||||||
|
|
||||||
out_path, elapsed, detail = generate_one(pipe, cfg, out_dir, i)
|
out_path, elapsed, detail = generate_one(pipe, cfg, out_dir, i)
|
||||||
print(f"[{i}/{len(prompts)}] saved: {out_path.name} elapsed: {elapsed:.3f}s")
|
print(f"[{i}/{len(prompts)}] saved: {out_path.name} elapsed: {elapsed:.3f}s")
|
||||||
records.append(detail)
|
records.append(detail)
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
#! /usr/bin/env bash
|
#! /usr/bin/env bash
|
||||||
image=harbor-contest.4pd.io/zhanghao/diffusers:bi100-0.2
|
image=diffusers:v0.1
|
||||||
docker run -it -v /root/zhanghao:/workspace -v /mnt:/mnt --device=dev/iluvatar1:/dev/iluvatar0 $image bash
|
device=1
|
||||||
|
docker run -v `pwd`:/host -e ASCEND_VISIBLE_DEVICES=1 --device /dev/davinci$device:/dev/davinci0 --device /dev/davinci_manager --device /dev/devmm_svm --device /dev/hisi_hdc -v /mnt:/mnt -v /usr/local/dcmi:/usr/local/dcmi -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi -v /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/ -v /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info -v /etc/ascend_install.info:/etc/ascend_install.info --privileged $image python3 main.py --model "/mnt/contest_ceph/zhanghao/models/stable-diffusion-v1-5" --json "dataset.json" --results "results.json" --outdir "output" --device cuda --dtype fp16
|
||||||
|
|||||||
@@ -1,3 +0,0 @@
|
|||||||
#! /usr/bin/env bash
|
|
||||||
image=harbor-contest.4pd.io/zhanghao/diffusers:a100-0.2
|
|
||||||
docker run -it -v /home/zhanghao/workspace:/workspace -v /mnt:/mnt $image bash
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
#! /usr/bin/env bash
|
|
||||||
image=harbor-contest.4pd.io/zhanghao/diffusers:mrv100-0.2
|
|
||||||
docker run -it -v /root/zhanghao:/workspace -v /mnt:/mnt --device=dev/iluvatar0:/dev/iluvatar0 $image bash
|
|
||||||
3
test.py
3
test.py
@@ -5,7 +5,8 @@ import time
|
|||||||
model_path = "/mnt/contest_ceph/zhanghao/models/stable-diffusion-v1-5"
|
model_path = "/mnt/contest_ceph/zhanghao/models/stable-diffusion-v1-5"
|
||||||
# model_path = "/mnt/contest_ceph/zhanghao/models/stable-diffusion-3.5-medium"
|
# model_path = "/mnt/contest_ceph/zhanghao/models/stable-diffusion-3.5-medium"
|
||||||
pipeline = DiffusionPipeline.from_pretrained(model_path, torch_dtype=torch.float16)
|
pipeline = DiffusionPipeline.from_pretrained(model_path, torch_dtype=torch.float16)
|
||||||
pipeline.to("cuda")
|
device = "cuda" if torch.cuda.is_available() else "npu" if torch.npu.is_available() else "cpu"
|
||||||
|
pipeline.to(device)
|
||||||
start = time.time()
|
start = time.time()
|
||||||
image = pipeline("An image of a squirrel in Picasso style").images[0]
|
image = pipeline("An image of a squirrel in Picasso style").images[0]
|
||||||
end = time.time()
|
end = time.time()
|
||||||
|
|||||||
Reference in New Issue
Block a user