5 Commits

Author SHA1 Message Date
zhousha
af6f501a5a update main.py 2026-06-18 15:22:29 +08:00
zhousha
94da35d152 clean up Dockerfile 2026-06-14 23:55:41 +08:00
zhousha
5b92f129d2 clean up Dockerfile 2026-06-14 23:54:02 +08:00
zhousha
87d4ae1c18 fix: add env vars to Dockerfile 2026-06-12 21:02:56 +08:00
zhousha
6f599a8a23 fix: add env vars to Dockerfile 2026-06-12 14:51:50 +08:00
2 changed files with 66 additions and 52 deletions

View File

@@ -1,5 +1,29 @@
# xc_validation_strategy # xc_validation_strategy
信创自动化模型适配平台 — 验证策略服务 批量向 ModelHub XC 平台提交模型验证任务的策略服务,之后保持 HTTP 服务存活供平台探活。
从 HuggingFace 周期性抓取新模型,自动完成同步、下载、提交验证任务的全流程,常驻运行在 xc_agent_platform 上。 ## 功能
- 自动登录 ModelHub 获取 Token
- 批量提交模型验证任务vLLM 框架Cambricon MLU-370-x8
- 提交结果写入 `submitted_validation_tasks.txt`
- 暴露 `/health``/status` 接口满足平台运行时契约
## 项目结构
```
.
├── main.py # 主入口HTTP 服务 + 提交逻辑
├── Dockerfile # 平台镜像构建配置
├── requirements.txt # Python 依赖
└── submitted_validation_tasks.txt # 运行后自动生成,记录提交结果
```
## 平台契约说明
本项目满足平台对策略镜像的全部必要约束:
- Dockerfile 位于仓库根目录,基于官方轻量基础镜像
- 暴露 8080 端口并实现 `GET /health`
- 通过环境变量 `STRATEGY_ID` 获取策略 ID
- 正确处理 `SIGTERM` 信号,支持优雅停机

90
main.py
View File

@@ -9,7 +9,6 @@ import json
import os import os
import signal import signal
import threading import threading
import traceback
from datetime import datetime from datetime import datetime
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
from typing import List, Tuple from typing import List, Tuple
@@ -20,16 +19,15 @@ import requests
# 配置(全部从环境变量读取,不硬编码敏感信息) # 配置(全部从环境变量读取,不硬编码敏感信息)
# ══════════════════════════════════════════════════════════ # ══════════════════════════════════════════════════════════
BASE_URL = os.environ.get("BASE_URL", "https://modelhub.org.cn") BASE_URL = os.environ.get("BASE_URL", "https://modelhub.org.cn")
LOGIN_ENDPOINT = "/adminApi/user/login"
SUBMIT_ENDPOINT = "/adminApi/async/task/create-contest-task" SUBMIT_ENDPOINT = "/adminApi/async/task/create-contest-task"
USER_ACCOUNT = os.environ["USER_ACCOUNT"] # 必填 # 通过 curl -X POST https://modelhub.org.cn/adminApi/user/login 获取后填入
USER_PASSWORD = os.environ["USER_PASSWORD"] # 必填 AUTH_TOKEN = "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VyQWNjb3VudCI6Inpob3VzaGFzaGEiLCJpZCI6MTQsInVzZXJSb2xlIjoibGVhZGVyYm9hcmQiLCJleHAiOjE3ODE4NTE0NzcsImlhdCI6MTc4MTI0NjY3N30.p3uvCpG50aLNifNVVXxvzmWJahbLM5K1671FVCtj8E8"
CONTEST_API_TOKEN = os.environ["CONTEST_API_TOKEN"] # 必填 CONTEST_API_TOKEN = "ef1ef82f3c9efee413d602345fbe224d"
STRATEGY_ID = os.environ.get("STRATEGY_ID", "") # 平台注入 CONTRIBUTORS = "zhoushasha"
CONTRIBUTORS = os.environ.get("CONTRIBUTORS", USER_ACCOUNT) GPU_TYPE = "Cambricon_mlu-370-x8"
GPU_TYPE = os.environ.get("GPU_TYPE", "Cambricon_mlu-370-x8") TASK_TYPE = "text-generation"
TASK_TYPE = os.environ.get("TASK_TYPE", "text-generation") STRATEGY_ID = os.environ.get("STRATEGY_ID", "") # 平台自动注入,无需修改
HTTP_HOST = "0.0.0.0" HTTP_HOST = "0.0.0.0"
HTTP_PORT = 8080 HTTP_PORT = 8080
@@ -38,26 +36,36 @@ HTTP_PORT = 8080
# 模型列表 # 模型列表
# ══════════════════════════════════════════════════════════ # ══════════════════════════════════════════════════════════
ALL_MODEL_IDS = [ ALL_MODEL_IDS = [
"AI-ModelScope/gemma-2b", "l3utterfly/mistral-7b-v0.1-layla-v4",
"AI-ModelScope/falcon-mamba-7b", "OpenBuddy/openbuddy-mistral-7b-v13.1",
"katanemo/deepseek-2", "allenai/truthfulqa-info-judge-llama2-7B",
"OpenBMB/MiniCPM4-0.5B", "l3utterfly/mistral-7b-v0.1-layla-v1",
"NousResearch/Meta-Llama-3-8B-Instruct", "l3utterfly/minima-3b-layla-v2",
"MediaTek-Research/Breeze-7B-Instruct-v1_0", "l3utterfly/tinyllama-1.1b-layla-v4",
"QLUNLP/BianCang-Qwen2.5-7B-Instruct", "l3utterfly/mistral-7b-v0.1-layla-v2",
"OpenBMB/MiniCPM4-Survey", "l3utterfly/tinyllama-1.1b-layla-v1",
"OpenBMB/MiniCPM4-8B", "Duxiaoman-DI/XuanYuan-13B-Chat",
"PaddlePaddle/ERNIE-4.5-0.3B-PT", "l3utterfly/minima-3b-layla-v1",
"LLM-Research/Llama-Guard-3-8B", "AI-ModelScope/gemma-2-2b",
"OpenBMB/MiniCPM-2B-dpo-fp16", "baichuan-inc/Baichuan-13B-Base",
"OpenBMB/MiniCPM4.1-8B", "LGAI-EXAONE/EXAONE-Deep-2.4B",
"Cylingo/Xinyuan-LLM-14B-0428", "NousResearch/DeepHermes-3-Llama-3-3B-Preview",
"Fengshenbang/Ziya-LLaMA-13B-v1", "Fengshenbang/Ziya2-13B-Base",
"baichuan-inc/Baichuan2-13B-Chat", "prithivMLmods/QwQ-MathOct-7B",
"LLM-Research/gemma-2-9b-it", "l3utterfly/phi-2-layla-v1-chatml",
"Qwen/CodeQwen1.5-7B-Chat", "argilla/notus-7b-v1",
"OpenBMB/cpm-bee-10b", "prithivMLmods/Doopler-Augment-3B-Cox",
"OpenBMB/MiniCPM3-4B", "prithivMLmods/Blaze.1-32B-Instruct",
"CohereLabs/aya-expanse-8B",
"Magpie-Align/MagpieLM-4B-SFT-v0.1",
"Magpie-Align/MagpieLM-8B-SFT-v0.1",
"Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.2",
"Magpie-Align/MagpieLM-8B-Chat-v0.1",
"Magpie-Align/Llama-3.1-8B-Magpie-Align-SFT-v0.1",
"Magpie-Align/Llama-3-8B-Magpie-Air-SFT-300K-v0.1",
"prithivMLmods/Tulu-MathLingo-8B",
"prithivMLmods/Triangulum-5B",
"prithivMLmods/Viper-Coder-v0.1",
] ]
# ══════════════════════════════════════════════════════════ # ══════════════════════════════════════════════════════════
@@ -110,21 +118,6 @@ def _run_http():
# ══════════════════════════════════════════════════════════ # ══════════════════════════════════════════════════════════
# 业务逻辑 # 业务逻辑
# ══════════════════════════════════════════════════════════ # ══════════════════════════════════════════════════════════
def _login() -> str:
headers = {"Content-Type": "application/json"}
resp = requests.post(
BASE_URL + LOGIN_ENDPOINT,
headers=headers,
json={"userAccount": USER_ACCOUNT, "userPassword": USER_PASSWORD},
timeout=30,
)
data = resp.json()
if data.get("code") != 0:
raise RuntimeError(f"登录失败: {data.get('message')}")
print("[worker] 登录成功", flush=True)
return data["data"]["token"]
def _submit_task(token: str, model_id: str) -> Tuple[bool, str]: def _submit_task(token: str, model_id: str) -> Tuple[bool, str]:
headers = { headers = {
"Content-Type": "application/json", "Content-Type": "application/json",
@@ -167,6 +160,7 @@ ref_config:
"taskType": TASK_TYPE, "taskType": TASK_TYPE,
}], }],
} }
print(f"[payload] {json.dumps(payload, indent=2, ensure_ascii=False)}", flush=True)
try: try:
resp = requests.post( resp = requests.post(
BASE_URL + SUBMIT_ENDPOINT, BASE_URL + SUBMIT_ENDPOINT,
@@ -192,12 +186,8 @@ def _run_worker():
_state["phase"] = "submitting" _state["phase"] = "submitting"
successful: List[Tuple[str, str]] = [] successful: List[Tuple[str, str]] = []
try: token = AUTH_TOKEN
token = _login() print("[worker] 使用预设 Token跳过登录", flush=True)
except Exception:
traceback.print_exc()
_state["phase"] = "error"
return
for model_id in ALL_MODEL_IDS: for model_id in ALL_MODEL_IDS:
if _shutdown.is_set(): if _shutdown.is_set():