Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
af6f501a5a | ||
|
|
94da35d152 | ||
|
|
5b92f129d2 | ||
|
|
87d4ae1c18 |
@@ -1,11 +1,6 @@
|
|||||||
FROM modelhubxc-4pd.tencentcloudcr.com/xc_agent_platform/python:3.11-slim
|
FROM modelhubxc-4pd.tencentcloudcr.com/xc_agent_platform/python:3.11-slim
|
||||||
|
|
||||||
ENV PYTHONUNBUFFERED=1 \
|
ENV PYTHONUNBUFFERED=1
|
||||||
USER_ACCOUNT="zhoushasha@4paradigm.com" \
|
|
||||||
USER_PASSWORD="4pdpassword" \
|
|
||||||
CONTEST_API_TOKEN="ef1ef82f3c9efee413d602345fbe224d" \
|
|
||||||
CONTRIBUTORS="zhoushasha" \
|
|
||||||
GPU_TYPE="Cambricon_mlu-370-x8"
|
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
|
|||||||
28
README.md
28
README.md
@@ -1,5 +1,29 @@
|
|||||||
# xc_validation_strategy
|
# xc_validation_strategy
|
||||||
|
|
||||||
信创自动化模型适配平台 — 验证策略服务
|
批量向 ModelHub XC 平台提交模型验证任务的策略服务,之后保持 HTTP 服务存活供平台探活。
|
||||||
|
|
||||||
从 HuggingFace 周期性抓取新模型,自动完成同步、下载、提交验证任务的全流程,常驻运行在 xc_agent_platform 上。
|
## 功能
|
||||||
|
|
||||||
|
- 自动登录 ModelHub 获取 Token
|
||||||
|
- 批量提交模型验证任务(vLLM 框架,Cambricon MLU-370-x8)
|
||||||
|
- 提交结果写入 `submitted_validation_tasks.txt`
|
||||||
|
- 暴露 `/health` 和 `/status` 接口满足平台运行时契约
|
||||||
|
|
||||||
|
## 项目结构
|
||||||
|
|
||||||
|
```
|
||||||
|
.
|
||||||
|
├── main.py # 主入口:HTTP 服务 + 提交逻辑
|
||||||
|
├── Dockerfile # 平台镜像构建配置
|
||||||
|
├── requirements.txt # Python 依赖
|
||||||
|
└── submitted_validation_tasks.txt # 运行后自动生成,记录提交结果
|
||||||
|
```
|
||||||
|
|
||||||
|
## 平台契约说明
|
||||||
|
|
||||||
|
本项目满足平台对策略镜像的全部必要约束:
|
||||||
|
|
||||||
|
- Dockerfile 位于仓库根目录,基于官方轻量基础镜像
|
||||||
|
- 暴露 8080 端口并实现 `GET /health`
|
||||||
|
- 通过环境变量 `STRATEGY_ID` 获取策略 ID
|
||||||
|
- 正确处理 `SIGTERM` 信号,支持优雅停机
|
||||||
90
main.py
90
main.py
@@ -9,7 +9,6 @@ import json
|
|||||||
import os
|
import os
|
||||||
import signal
|
import signal
|
||||||
import threading
|
import threading
|
||||||
import traceback
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
||||||
from typing import List, Tuple
|
from typing import List, Tuple
|
||||||
@@ -20,16 +19,15 @@ import requests
|
|||||||
# 配置(全部从环境变量读取,不硬编码敏感信息)
|
# 配置(全部从环境变量读取,不硬编码敏感信息)
|
||||||
# ══════════════════════════════════════════════════════════
|
# ══════════════════════════════════════════════════════════
|
||||||
BASE_URL = os.environ.get("BASE_URL", "https://modelhub.org.cn")
|
BASE_URL = os.environ.get("BASE_URL", "https://modelhub.org.cn")
|
||||||
LOGIN_ENDPOINT = "/adminApi/user/login"
|
|
||||||
SUBMIT_ENDPOINT = "/adminApi/async/task/create-contest-task"
|
SUBMIT_ENDPOINT = "/adminApi/async/task/create-contest-task"
|
||||||
|
|
||||||
USER_ACCOUNT = os.environ["USER_ACCOUNT"] # 必填
|
# 通过 curl -X POST https://modelhub.org.cn/adminApi/user/login 获取后填入
|
||||||
USER_PASSWORD = os.environ["USER_PASSWORD"] # 必填
|
AUTH_TOKEN = "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VyQWNjb3VudCI6Inpob3VzaGFzaGEiLCJpZCI6MTQsInVzZXJSb2xlIjoibGVhZGVyYm9hcmQiLCJleHAiOjE3ODE4NTE0NzcsImlhdCI6MTc4MTI0NjY3N30.p3uvCpG50aLNifNVVXxvzmWJahbLM5K1671FVCtj8E8"
|
||||||
CONTEST_API_TOKEN = os.environ["CONTEST_API_TOKEN"] # 必填
|
CONTEST_API_TOKEN = "ef1ef82f3c9efee413d602345fbe224d"
|
||||||
STRATEGY_ID = os.environ.get("STRATEGY_ID", "") # 平台注入
|
CONTRIBUTORS = "zhoushasha"
|
||||||
CONTRIBUTORS = os.environ.get("CONTRIBUTORS", USER_ACCOUNT)
|
GPU_TYPE = "Cambricon_mlu-370-x8"
|
||||||
GPU_TYPE = os.environ.get("GPU_TYPE", "Cambricon_mlu-370-x8")
|
TASK_TYPE = "text-generation"
|
||||||
TASK_TYPE = os.environ.get("TASK_TYPE", "text-generation")
|
STRATEGY_ID = os.environ.get("STRATEGY_ID", "") # 平台自动注入,无需修改
|
||||||
|
|
||||||
HTTP_HOST = "0.0.0.0"
|
HTTP_HOST = "0.0.0.0"
|
||||||
HTTP_PORT = 8080
|
HTTP_PORT = 8080
|
||||||
@@ -38,26 +36,36 @@ HTTP_PORT = 8080
|
|||||||
# 模型列表
|
# 模型列表
|
||||||
# ══════════════════════════════════════════════════════════
|
# ══════════════════════════════════════════════════════════
|
||||||
ALL_MODEL_IDS = [
|
ALL_MODEL_IDS = [
|
||||||
"AI-ModelScope/gemma-2b",
|
"l3utterfly/mistral-7b-v0.1-layla-v4",
|
||||||
"AI-ModelScope/falcon-mamba-7b",
|
"OpenBuddy/openbuddy-mistral-7b-v13.1",
|
||||||
"katanemo/deepseek-2",
|
"allenai/truthfulqa-info-judge-llama2-7B",
|
||||||
"OpenBMB/MiniCPM4-0.5B",
|
"l3utterfly/mistral-7b-v0.1-layla-v1",
|
||||||
"NousResearch/Meta-Llama-3-8B-Instruct",
|
"l3utterfly/minima-3b-layla-v2",
|
||||||
"MediaTek-Research/Breeze-7B-Instruct-v1_0",
|
"l3utterfly/tinyllama-1.1b-layla-v4",
|
||||||
"QLUNLP/BianCang-Qwen2.5-7B-Instruct",
|
"l3utterfly/mistral-7b-v0.1-layla-v2",
|
||||||
"OpenBMB/MiniCPM4-Survey",
|
"l3utterfly/tinyllama-1.1b-layla-v1",
|
||||||
"OpenBMB/MiniCPM4-8B",
|
"Duxiaoman-DI/XuanYuan-13B-Chat",
|
||||||
"PaddlePaddle/ERNIE-4.5-0.3B-PT",
|
"l3utterfly/minima-3b-layla-v1",
|
||||||
"LLM-Research/Llama-Guard-3-8B",
|
"AI-ModelScope/gemma-2-2b",
|
||||||
"OpenBMB/MiniCPM-2B-dpo-fp16",
|
"baichuan-inc/Baichuan-13B-Base",
|
||||||
"OpenBMB/MiniCPM4.1-8B",
|
"LGAI-EXAONE/EXAONE-Deep-2.4B",
|
||||||
"Cylingo/Xinyuan-LLM-14B-0428",
|
"NousResearch/DeepHermes-3-Llama-3-3B-Preview",
|
||||||
"Fengshenbang/Ziya-LLaMA-13B-v1",
|
"Fengshenbang/Ziya2-13B-Base",
|
||||||
"baichuan-inc/Baichuan2-13B-Chat",
|
"prithivMLmods/QwQ-MathOct-7B",
|
||||||
"LLM-Research/gemma-2-9b-it",
|
"l3utterfly/phi-2-layla-v1-chatml",
|
||||||
"Qwen/CodeQwen1.5-7B-Chat",
|
"argilla/notus-7b-v1",
|
||||||
"OpenBMB/cpm-bee-10b",
|
"prithivMLmods/Doopler-Augment-3B-Cox",
|
||||||
"OpenBMB/MiniCPM3-4B",
|
"prithivMLmods/Blaze.1-32B-Instruct",
|
||||||
|
"CohereLabs/aya-expanse-8B",
|
||||||
|
"Magpie-Align/MagpieLM-4B-SFT-v0.1",
|
||||||
|
"Magpie-Align/MagpieLM-8B-SFT-v0.1",
|
||||||
|
"Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.2",
|
||||||
|
"Magpie-Align/MagpieLM-8B-Chat-v0.1",
|
||||||
|
"Magpie-Align/Llama-3.1-8B-Magpie-Align-SFT-v0.1",
|
||||||
|
"Magpie-Align/Llama-3-8B-Magpie-Air-SFT-300K-v0.1",
|
||||||
|
"prithivMLmods/Tulu-MathLingo-8B",
|
||||||
|
"prithivMLmods/Triangulum-5B",
|
||||||
|
"prithivMLmods/Viper-Coder-v0.1",
|
||||||
]
|
]
|
||||||
|
|
||||||
# ══════════════════════════════════════════════════════════
|
# ══════════════════════════════════════════════════════════
|
||||||
@@ -110,21 +118,6 @@ def _run_http():
|
|||||||
# ══════════════════════════════════════════════════════════
|
# ══════════════════════════════════════════════════════════
|
||||||
# 业务逻辑
|
# 业务逻辑
|
||||||
# ══════════════════════════════════════════════════════════
|
# ══════════════════════════════════════════════════════════
|
||||||
def _login() -> str:
|
|
||||||
headers = {"Content-Type": "application/json"}
|
|
||||||
resp = requests.post(
|
|
||||||
BASE_URL + LOGIN_ENDPOINT,
|
|
||||||
headers=headers,
|
|
||||||
json={"userAccount": USER_ACCOUNT, "userPassword": USER_PASSWORD},
|
|
||||||
timeout=30,
|
|
||||||
)
|
|
||||||
data = resp.json()
|
|
||||||
if data.get("code") != 0:
|
|
||||||
raise RuntimeError(f"登录失败: {data.get('message')}")
|
|
||||||
print("[worker] 登录成功", flush=True)
|
|
||||||
return data["data"]["token"]
|
|
||||||
|
|
||||||
|
|
||||||
def _submit_task(token: str, model_id: str) -> Tuple[bool, str]:
|
def _submit_task(token: str, model_id: str) -> Tuple[bool, str]:
|
||||||
headers = {
|
headers = {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
@@ -167,6 +160,7 @@ ref_config:
|
|||||||
"taskType": TASK_TYPE,
|
"taskType": TASK_TYPE,
|
||||||
}],
|
}],
|
||||||
}
|
}
|
||||||
|
print(f"[payload] {json.dumps(payload, indent=2, ensure_ascii=False)}", flush=True)
|
||||||
try:
|
try:
|
||||||
resp = requests.post(
|
resp = requests.post(
|
||||||
BASE_URL + SUBMIT_ENDPOINT,
|
BASE_URL + SUBMIT_ENDPOINT,
|
||||||
@@ -192,12 +186,8 @@ def _run_worker():
|
|||||||
_state["phase"] = "submitting"
|
_state["phase"] = "submitting"
|
||||||
|
|
||||||
successful: List[Tuple[str, str]] = []
|
successful: List[Tuple[str, str]] = []
|
||||||
try:
|
token = AUTH_TOKEN
|
||||||
token = _login()
|
print("[worker] 使用预设 Token,跳过登录", flush=True)
|
||||||
except Exception:
|
|
||||||
traceback.print_exc()
|
|
||||||
_state["phase"] = "error"
|
|
||||||
return
|
|
||||||
|
|
||||||
for model_id in ALL_MODEL_IDS:
|
for model_id in ALL_MODEL_IDS:
|
||||||
if _shutdown.is_set():
|
if _shutdown.is_set():
|
||||||
|
|||||||
Reference in New Issue
Block a user