From f0425d03b41c63194ff9ddaaf09761fb9b9832d2 Mon Sep 17 00:00:00 2001
From: ZHANG Hao <zhanghao@4paradigm.com>
Date: Wed, 3 Sep 2025 10:43:22 +0800
Subject: [PATCH] bge embedding support

---
 Dockerfile.ascend       |  6 +++
 dataset.json            |  1 +
 main.py                 | 93 +++++++++++++++++++++++++++++++++++++++++
 run_in_docker_ascend.sh |  4 ++
 4 files changed, 104 insertions(+)
 create mode 100644 Dockerfile.ascend
 create mode 100644 dataset.json
 create mode 100644 main.py
 create mode 100755 run_in_docker_ascend.sh

diff --git a/Dockerfile.ascend b/Dockerfile.ascend
new file mode 100644
index 0000000..9ed5905
--- /dev/null
+++ b/Dockerfile.ascend
@@ -0,0 +1,6 @@
+# FROM quay.io/ascend/vllm-ascend:v0.10.0rc1
+FROM git.modelhub.org.cn:9443/enginex-ascend/vllm-ascend:v0.10.0rc1
+
+WORKDIR /workspace
+RUN pip install sentence-transformers
+COPY main.py dataset.json /workspace/
diff --git a/dataset.json b/dataset.json
new file mode 100644
index 0000000..488818d
--- /dev/null
+++ b/dataset.json
@@ -0,0 +1 @@
+["样例数据-1", "样例数据-2", "样例数据-3"]
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..40e0eab
--- /dev/null
+++ b/main.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import argparse
+import json
+import time
+from pathlib import Path
+import numpy as np
+from sentence_transformers import SentenceTransformer
+
+def parse_args():
+    p = argparse.ArgumentParser(
+        description="Encode sentences with SentenceTransformer and output embeddings & pairwise cosine similarity."
+    )
+    p.add_argument("input_json", help="输入文件路径（JSON，形如 ['句子1','句子2', ...]）")
+    p.add_argument("output_json", help="输出文件路径（JSON）")
+    p.add_argument("model_path", help="模型路径或模型名，如 BAAI/bge-large-zh-v1.5 或本地目录")
+    p.add_argument("--device", default=None,
+                   help="设备：cuda / cpu / npu；默认自动检测（优先 cuda，其次 cpu；也可显式传 npu）")
+    p.add_argument("--batch-size", type=int, default=32, help="encode 批大小，默认 32")
+    p.add_argument("--no-normalize", action="store_true", help="不做 L2 归一化（默认会归一化）")
+    return p.parse_args()
+
+def auto_device(user_device: str | None) -> str:
+    if user_device:
+        return user_device
+    try:
+        import torch
+        if torch.cuda.is_available():
+            return "cuda"
+    except Exception:
+        pass
+    return "cpu"
+
+def main():
+    args = parse_args()
+    inp_path = Path(args.input_json)
+    out_path = Path(args.output_json)
+    model_path = args.model_path
+    device = auto_device(args.device)
+    normalize = not args.no_normalize
+
+    # 读取输入
+    with inp_path.open("r", encoding="utf-8") as f:
+        data = json.load(f)
+    if not isinstance(data, list):
+        raise ValueError("输入 JSON 必须是数组格式，如：['句子1', '句子2', ...]")
+    sentences = [str(x) for x in data]
+
+    # 加载模型
+    model = SentenceTransformer(model_path, device=device)
+
+    # 编码并计时
+    t0 = time.time()
+    embeddings = model.encode(
+        sentences,
+        batch_size=args.batch_size,
+        normalize_embeddings=normalize,
+        convert_to_numpy=True,
+        device=device
+    )
+    encode_time = time.time() - t0
+
+    # 若未归一化，则计算相似度前先做归一化（保证 similarity 为余弦相似度）
+    if not normalize:
+        norms = np.linalg.norm(embeddings, axis=1, keepdims=True) + 1e-12
+        embeddings = embeddings / norms
+
+    # 两两相似度（余弦）——已归一化则点积即余弦
+    similarity = embeddings @ embeddings.T
+
+    # 组织输出
+    result = {
+        "model_path": model_path,
+        "device": device,
+        "count": len(sentences),
+        "dim": int(embeddings.shape[1]) if len(embeddings.shape) == 2 else None,
+        "encode_time_seconds": round(float(encode_time), 6),
+        "sentences": sentences,
+        "embeddings": embeddings.tolist(),          # [N, D]
+        "similarity": similarity.tolist()           # [N, N]
+    }
+
+    # 保存
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    with out_path.open("w", encoding="utf-8") as f:
+        json.dump(result, f, ensure_ascii=False, indent=2)
+
+    print(f"✅ Done. Saved to: {out_path}")
+
+if __name__ == "__main__":
+    main()
+
diff --git a/run_in_docker_ascend.sh b/run_in_docker_ascend.sh
new file mode 100755
index 0000000..e1587c3
--- /dev/null
+++ b/run_in_docker_ascend.sh
@@ -0,0 +1,4 @@
+#! /usr/bin/env bash
+image=harbor-contest.4pd.io/zhanghao/feature:ascend-0.1
+device=1
+docker run -it -v `pwd`:/host -e ASCEND_VISIBLE_DEVICES=$device -e NPU_VISIBLE_DEVICES=${device} --device /dev/davinci$device:/dev/davinci0 --device /dev/davinci_manager --device /dev/devmm_svm --device /dev/hisi_hdc -v /mnt:/mnt -v /usr/local/dcmi:/usr/local/dcmi -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi -v /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/ -v /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info -v /etc/ascend_install.info:/etc/ascend_install.info --privileged --entrypoint bash  $image