5 Commits

Author SHA1 Message Date
zhousha
af6f501a5a update main.py 2026-06-18 15:22:29 +08:00
zhousha
94da35d152 clean up Dockerfile 2026-06-14 23:55:41 +08:00
zhousha
5b92f129d2 clean up Dockerfile 2026-06-14 23:54:02 +08:00
zhousha
87d4ae1c18 fix: add env vars to Dockerfile 2026-06-12 21:02:56 +08:00
zhousha
6f599a8a23 fix: add env vars to Dockerfile 2026-06-12 14:51:50 +08:00
2 changed files with 66 additions and 52 deletions

View File

@@ -1,5 +1,29 @@
# xc_validation_strategy
信创自动化模型适配平台 — 验证策略服务
批量向 ModelHub XC 平台提交模型验证任务的策略服务,之后保持 HTTP 服务存活供平台探活。
从 HuggingFace 周期性抓取新模型,自动完成同步、下载、提交验证任务的全流程,常驻运行在 xc_agent_platform 上。
## 功能
- 自动登录 ModelHub 获取 Token
- 批量提交模型验证任务vLLM 框架Cambricon MLU-370-x8
- 提交结果写入 `submitted_validation_tasks.txt`
- 暴露 `/health``/status` 接口满足平台运行时契约
## 项目结构
```
.
├── main.py # 主入口HTTP 服务 + 提交逻辑
├── Dockerfile # 平台镜像构建配置
├── requirements.txt # Python 依赖
└── submitted_validation_tasks.txt # 运行后自动生成,记录提交结果
```
## 平台契约说明
本项目满足平台对策略镜像的全部必要约束:
- Dockerfile 位于仓库根目录,基于官方轻量基础镜像
- 暴露 8080 端口并实现 `GET /health`
- 通过环境变量 `STRATEGY_ID` 获取策略 ID
- 正确处理 `SIGTERM` 信号,支持优雅停机

90
main.py
View File

@@ -9,7 +9,6 @@ import json
import os
import signal
import threading
import traceback
from datetime import datetime
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
from typing import List, Tuple
@@ -20,16 +19,15 @@ import requests
# 配置(全部从环境变量读取,不硬编码敏感信息)
# ══════════════════════════════════════════════════════════
BASE_URL = os.environ.get("BASE_URL", "https://modelhub.org.cn")
LOGIN_ENDPOINT = "/adminApi/user/login"
SUBMIT_ENDPOINT = "/adminApi/async/task/create-contest-task"
USER_ACCOUNT = os.environ["USER_ACCOUNT"] # 必填
USER_PASSWORD = os.environ["USER_PASSWORD"] # 必填
CONTEST_API_TOKEN = os.environ["CONTEST_API_TOKEN"] # 必填
STRATEGY_ID = os.environ.get("STRATEGY_ID", "") # 平台注入
CONTRIBUTORS = os.environ.get("CONTRIBUTORS", USER_ACCOUNT)
GPU_TYPE = os.environ.get("GPU_TYPE", "Cambricon_mlu-370-x8")
TASK_TYPE = os.environ.get("TASK_TYPE", "text-generation")
# 通过 curl -X POST https://modelhub.org.cn/adminApi/user/login 获取后填入
AUTH_TOKEN = "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VyQWNjb3VudCI6Inpob3VzaGFzaGEiLCJpZCI6MTQsInVzZXJSb2xlIjoibGVhZGVyYm9hcmQiLCJleHAiOjE3ODE4NTE0NzcsImlhdCI6MTc4MTI0NjY3N30.p3uvCpG50aLNifNVVXxvzmWJahbLM5K1671FVCtj8E8"
CONTEST_API_TOKEN = "ef1ef82f3c9efee413d602345fbe224d"
CONTRIBUTORS = "zhoushasha"
GPU_TYPE = "Cambricon_mlu-370-x8"
TASK_TYPE = "text-generation"
STRATEGY_ID = os.environ.get("STRATEGY_ID", "") # 平台自动注入,无需修改
HTTP_HOST = "0.0.0.0"
HTTP_PORT = 8080
@@ -38,26 +36,36 @@ HTTP_PORT = 8080
# 模型列表
# ══════════════════════════════════════════════════════════
ALL_MODEL_IDS = [
"AI-ModelScope/gemma-2b",
"AI-ModelScope/falcon-mamba-7b",
"katanemo/deepseek-2",
"OpenBMB/MiniCPM4-0.5B",
"NousResearch/Meta-Llama-3-8B-Instruct",
"MediaTek-Research/Breeze-7B-Instruct-v1_0",
"QLUNLP/BianCang-Qwen2.5-7B-Instruct",
"OpenBMB/MiniCPM4-Survey",
"OpenBMB/MiniCPM4-8B",
"PaddlePaddle/ERNIE-4.5-0.3B-PT",
"LLM-Research/Llama-Guard-3-8B",
"OpenBMB/MiniCPM-2B-dpo-fp16",
"OpenBMB/MiniCPM4.1-8B",
"Cylingo/Xinyuan-LLM-14B-0428",
"Fengshenbang/Ziya-LLaMA-13B-v1",
"baichuan-inc/Baichuan2-13B-Chat",
"LLM-Research/gemma-2-9b-it",
"Qwen/CodeQwen1.5-7B-Chat",
"OpenBMB/cpm-bee-10b",
"OpenBMB/MiniCPM3-4B",
"l3utterfly/mistral-7b-v0.1-layla-v4",
"OpenBuddy/openbuddy-mistral-7b-v13.1",
"allenai/truthfulqa-info-judge-llama2-7B",
"l3utterfly/mistral-7b-v0.1-layla-v1",
"l3utterfly/minima-3b-layla-v2",
"l3utterfly/tinyllama-1.1b-layla-v4",
"l3utterfly/mistral-7b-v0.1-layla-v2",
"l3utterfly/tinyllama-1.1b-layla-v1",
"Duxiaoman-DI/XuanYuan-13B-Chat",
"l3utterfly/minima-3b-layla-v1",
"AI-ModelScope/gemma-2-2b",
"baichuan-inc/Baichuan-13B-Base",
"LGAI-EXAONE/EXAONE-Deep-2.4B",
"NousResearch/DeepHermes-3-Llama-3-3B-Preview",
"Fengshenbang/Ziya2-13B-Base",
"prithivMLmods/QwQ-MathOct-7B",
"l3utterfly/phi-2-layla-v1-chatml",
"argilla/notus-7b-v1",
"prithivMLmods/Doopler-Augment-3B-Cox",
"prithivMLmods/Blaze.1-32B-Instruct",
"CohereLabs/aya-expanse-8B",
"Magpie-Align/MagpieLM-4B-SFT-v0.1",
"Magpie-Align/MagpieLM-8B-SFT-v0.1",
"Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.2",
"Magpie-Align/MagpieLM-8B-Chat-v0.1",
"Magpie-Align/Llama-3.1-8B-Magpie-Align-SFT-v0.1",
"Magpie-Align/Llama-3-8B-Magpie-Air-SFT-300K-v0.1",
"prithivMLmods/Tulu-MathLingo-8B",
"prithivMLmods/Triangulum-5B",
"prithivMLmods/Viper-Coder-v0.1",
]
# ══════════════════════════════════════════════════════════
@@ -110,21 +118,6 @@ def _run_http():
# ══════════════════════════════════════════════════════════
# 业务逻辑
# ══════════════════════════════════════════════════════════
def _login() -> str:
headers = {"Content-Type": "application/json"}
resp = requests.post(
BASE_URL + LOGIN_ENDPOINT,
headers=headers,
json={"userAccount": USER_ACCOUNT, "userPassword": USER_PASSWORD},
timeout=30,
)
data = resp.json()
if data.get("code") != 0:
raise RuntimeError(f"登录失败: {data.get('message')}")
print("[worker] 登录成功", flush=True)
return data["data"]["token"]
def _submit_task(token: str, model_id: str) -> Tuple[bool, str]:
headers = {
"Content-Type": "application/json",
@@ -167,6 +160,7 @@ ref_config:
"taskType": TASK_TYPE,
}],
}
print(f"[payload] {json.dumps(payload, indent=2, ensure_ascii=False)}", flush=True)
try:
resp = requests.post(
BASE_URL + SUBMIT_ENDPOINT,
@@ -192,12 +186,8 @@ def _run_worker():
_state["phase"] = "submitting"
successful: List[Tuple[str, str]] = []
try:
token = _login()
except Exception:
traceback.print_exc()
_state["phase"] = "error"
return
token = AUTH_TOKEN
print("[worker] 使用预设 Token跳过登录", flush=True)
for model_id in ALL_MODEL_IDS:
if _shutdown.is_set():