From f0425d03b41c63194ff9ddaaf09761fb9b9832d2 Mon Sep 17 00:00:00 2001 From: ZHANG Hao Date: Wed, 3 Sep 2025 10:43:22 +0800 Subject: [PATCH] bge embedding support --- Dockerfile.ascend | 6 +++ dataset.json | 1 + main.py | 93 +++++++++++++++++++++++++++++++++++++++++ run_in_docker_ascend.sh | 4 ++ 4 files changed, 104 insertions(+) create mode 100644 Dockerfile.ascend create mode 100644 dataset.json create mode 100644 main.py create mode 100755 run_in_docker_ascend.sh diff --git a/Dockerfile.ascend b/Dockerfile.ascend new file mode 100644 index 0000000..9ed5905 --- /dev/null +++ b/Dockerfile.ascend @@ -0,0 +1,6 @@ +# FROM quay.io/ascend/vllm-ascend:v0.10.0rc1 +FROM git.modelhub.org.cn:9443/enginex-ascend/vllm-ascend:v0.10.0rc1 + +WORKDIR /workspace +RUN pip install sentence-transformers +COPY main.py dataset.json /workspace/ diff --git a/dataset.json b/dataset.json new file mode 100644 index 0000000..488818d --- /dev/null +++ b/dataset.json @@ -0,0 +1 @@ +["样例数据-1", "样例数据-2", "样例数据-3"] diff --git a/main.py b/main.py new file mode 100644 index 0000000..40e0eab --- /dev/null +++ b/main.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import argparse +import json +import time +from pathlib import Path +import numpy as np +from sentence_transformers import SentenceTransformer + +def parse_args(): + p = argparse.ArgumentParser( + description="Encode sentences with SentenceTransformer and output embeddings & pairwise cosine similarity." + ) + p.add_argument("input_json", help="输入文件路径(JSON,形如 ['句子1','句子2', ...])") + p.add_argument("output_json", help="输出文件路径(JSON)") + p.add_argument("model_path", help="模型路径或模型名,如 BAAI/bge-large-zh-v1.5 或本地目录") + p.add_argument("--device", default=None, + help="设备:cuda / cpu / npu;默认自动检测(优先 cuda,其次 cpu;也可显式传 npu)") + p.add_argument("--batch-size", type=int, default=32, help="encode 批大小,默认 32") + p.add_argument("--no-normalize", action="store_true", help="不做 L2 归一化(默认会归一化)") + return p.parse_args() + +def auto_device(user_device: str | None) -> str: + if user_device: + return user_device + try: + import torch + if torch.cuda.is_available(): + return "cuda" + except Exception: + pass + return "cpu" + +def main(): + args = parse_args() + inp_path = Path(args.input_json) + out_path = Path(args.output_json) + model_path = args.model_path + device = auto_device(args.device) + normalize = not args.no_normalize + + # 读取输入 + with inp_path.open("r", encoding="utf-8") as f: + data = json.load(f) + if not isinstance(data, list): + raise ValueError("输入 JSON 必须是数组格式,如:['句子1', '句子2', ...]") + sentences = [str(x) for x in data] + + # 加载模型 + model = SentenceTransformer(model_path, device=device) + + # 编码并计时 + t0 = time.time() + embeddings = model.encode( + sentences, + batch_size=args.batch_size, + normalize_embeddings=normalize, + convert_to_numpy=True, + device=device + ) + encode_time = time.time() - t0 + + # 若未归一化,则计算相似度前先做归一化(保证 similarity 为余弦相似度) + if not normalize: + norms = np.linalg.norm(embeddings, axis=1, keepdims=True) + 1e-12 + embeddings = embeddings / norms + + # 两两相似度(余弦)——已归一化则点积即余弦 + similarity = embeddings @ embeddings.T + + # 组织输出 + result = { + "model_path": model_path, + "device": device, + "count": len(sentences), + "dim": int(embeddings.shape[1]) if len(embeddings.shape) == 2 else None, + "encode_time_seconds": round(float(encode_time), 6), + "sentences": sentences, + "embeddings": embeddings.tolist(), # [N, D] + "similarity": similarity.tolist() # [N, N] + } + + # 保存 + out_path.parent.mkdir(parents=True, exist_ok=True) + with out_path.open("w", encoding="utf-8") as f: + json.dump(result, f, ensure_ascii=False, indent=2) + + print(f"✅ Done. Saved to: {out_path}") + +if __name__ == "__main__": + main() + diff --git a/run_in_docker_ascend.sh b/run_in_docker_ascend.sh new file mode 100755 index 0000000..e1587c3 --- /dev/null +++ b/run_in_docker_ascend.sh @@ -0,0 +1,4 @@ +#! /usr/bin/env bash +image=harbor-contest.4pd.io/zhanghao/feature:ascend-0.1 +device=1 +docker run -it -v `pwd`:/host -e ASCEND_VISIBLE_DEVICES=$device -e NPU_VISIBLE_DEVICES=${device} --device /dev/davinci$device:/dev/davinci0 --device /dev/davinci_manager --device /dev/devmm_svm --device /dev/hisi_hdc -v /mnt:/mnt -v /usr/local/dcmi:/usr/local/dcmi -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi -v /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/ -v /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info -v /etc/ascend_install.info:/etc/ascend_install.info --privileged --entrypoint bash $image