""" xc_validation_strategy — 主入口 启动后执行一次模型验证任务批量提交,之后保持 HTTP 服务存活。 同时暴露 /health(K8s 探活)和 /status(运行状态)。 """ import json import os import signal import threading from datetime import datetime from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer from typing import List, Tuple import requests # ══════════════════════════════════════════════════════════ # 配置(全部从环境变量读取,不硬编码敏感信息) # ══════════════════════════════════════════════════════════ BASE_URL = os.environ.get("BASE_URL", "https://modelhub.org.cn") SUBMIT_ENDPOINT = "/adminApi/async/task/create-contest-task" # 通过 curl -X POST https://modelhub.org.cn/adminApi/user/login 获取后填入 AUTH_TOKEN = "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VyQWNjb3VudCI6Inpob3VzaGFzaGEiLCJpZCI6MTQsInVzZXJSb2xlIjoibGVhZGVyYm9hcmQiLCJleHAiOjE3ODE4NTE0NzcsImlhdCI6MTc4MTI0NjY3N30.p3uvCpG50aLNifNVVXxvzmWJahbLM5K1671FVCtj8E8" CONTEST_API_TOKEN = "ef1ef82f3c9efee413d602345fbe224d" CONTRIBUTORS = "zhoushasha" GPU_TYPE = "Cambricon_mlu-370-x8" TASK_TYPE = "text-generation" STRATEGY_ID = os.environ.get("STRATEGY_ID", "") # 平台自动注入,无需修改 HTTP_HOST = "0.0.0.0" HTTP_PORT = 8080 # ══════════════════════════════════════════════════════════ # 模型列表 # ══════════════════════════════════════════════════════════ ALL_MODEL_IDS = [ "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3", "migtissera/SynthIA-7B-v1.3", "TinyLlama/TinyLlama-1.1B-intermediate-step-955k-token-2T", "bigscience/bloomz-1b1", "EleutherAI/pythia-6.9b-deduped", "AvitoTech/avibe", "Enoch/llama-7b-hf", "asingh15/qwen-abs-verl-sft-rephrased-lr5e6-ep1-0109", "PrimeIntellect/INTELLECT-1", "neuralmagic/starcoder2-3b-quantized.w8a8", "Saxo/Linkbricks-Horizon-AI-Korean-Gemma-2-sft-dpo-27B", "HuggingFaceH4/zephyr-7b-gemma-v0.1", "neuralmagic/Llama-2-7b-chat-quantized.w4a16", "neuralmagic/starcoder2-15b-quantized.w8a8", "DAMO-NLP-SG/Qwen2.5-7B-LongPO-128K", "guardrail/llama-2-7b-guanaco-instruct-sharded", "shenzhi-wang/Gemma-2-27B-Chinese-Chat", "pavankumarbalijepalli/phi2-sqlcoder", "neph1/bellman-7b-mistral-instruct", "neuralmagic/Meta-Llama-3-8B-Instruct-quantized.w8a16", "neuralmagic/Qwen2-7B-Instruct-quantized.w8a8", "lamm-mit/BioinspiredLLM", "neuralmagic/Qwen2-7B-Instruct-quantized.w8a16", "dataopsnick/Qwen3-4B-Instruct-2507-zip-rc", "huihui-ai/MicroThinker-3B-Preview", "OrionStarAI/Orion-14B-Base", "georgesung/llama3_8b_chat_uncensored", "FreedomIntelligence/RAG-Instruct-Llama3-3B", "Aryanne/WestSenzu-Swap-7B", "Josephgflowers/Cinder-Phi-2-Test-1", "FreedomIntelligence/Apollo-6B", "Josephgflowers/Tinyllama-1.3B-Cinder-Reason-Test-2", "Josephgflowers/Tinyllama-1.3B-Cinder-Reason-Test", "247labs/Llama-2-7b-Verse-Bot", "praneethposina/customer_support_bot", "KBlueLeaf/TIPO-200M", "norallm/normistral-11b-warm", "theprint/Boptruth-Agatha-7B", "ericflo/Llama-3.1-8B-ContinuedTraining2-FFT", "okwinds/OpenR1-Qwen-7B", "ruohuaw/deepquery-3b-sft", "theprint/Boptruth-NeuralMonarch-7B", "MaziyarPanahi/calme-3.1-qwenloi-3b", "alperiox/trendyol-7b-base-v1-mtLoRA_entr", "theprint/phi-3-mini-4k-python", "uukuguy/speechless-nl2sql-ds-6.7b", "uukuguy/speechless-coder-ds-6.7b", "tybrs/llama-guard-quant", "Josephgflowers/TinyLlama-3T-Cinder-v1.3", "mlabonne/Darewin-7B-v2", "TeichAI/Qwen3-1.7B-Gemini-2.5-Flash-Lite-Preview-Distill", "TeichAI/Nemotron-Orchestrator-8B-DeepSeek-v3.2-Speciale-Distill", "shadowml/BeagSake-7B", "lex-hue/Delexa-7b", "h2oai/h2o-danube3-500m-chat", "bigcode/gpt_bigcode-santacoder", "openlm-research/open_llama_7b", "upstage/SOLAR-10.7B-v1.0", "prithivMLmods/Phi-3.5-Mini-Xalate", "prithivMLmods/Qwen3-Bifrost-SOL-4B-GUFF", "prithivMLmods/Volans-Opus-14B-Exp", "prithivMLmods/Viper-OneCoder-UIGEN", "prithivMLmods/Tucana-Opus-14B-r999", "prithivMLmods/Sombrero-Opus-14B-Sm5", "prithivMLmods/Sombrero-Opus-14B-Sm4", "prithivMLmods/Reasoning-SmolLM2-135M", "prithivMLmods/Sombrero-Opus-14B-Sm1", "prithivMLmods/LwQ-10B-Instruct", "prithivMLmods/Sombrero-Opus-14B-Elite5", "prithivMLmods/Eridanus-Opus-14B-r999", "prithivMLmods/Equuleus-Opus-14B-Exp", "prithivMLmods/Epimetheus-14B-Axo", "prithivMLmods/Phi-4-Math-IO", "prithivMLmods/Omni-Reasoner4-Merged", "prithivMLmods/Pegasus-Opus-14B-Exp", "prithivMLmods/Elita-1", "prithivMLmods/Delta-Pavonis-Qwen-14B", "prithivMLmods/Nu2-Lupi-Qwen-14B", "prithivMLmods/Coma-II-14B", "MaziyarPanahi/calme-2.7-qwen2-7b", "prithivMLmods/Monocerotis-V838-14B", "prithivMLmods/Calcium-Opus-14B-Merge", "prithivMLmods/Calcium-Opus-14B-Elite3", "prithivMLmods/Calcium-Opus-14B-Elite2-R1", "prithivMLmods/Calcium-Opus-14B-Elite2", "prithivMLmods/Calcium-Opus-14B-Elite-Stock", "prithivMLmods/Megatron-Opus-14B-2.1", "prithivMLmods/Blaze.1-27B-Reflection", "prithivMLmods/Megatron-Corpus-14B-Exp.v2", "prithivMLmods/Megatron-Corpus-14B-Exp", "GAIR/autoj-bilingual-6b", "TheBloke/airoboros-7b-gpt4-fp16", "Undi95/Mistral-11B-OmniMix9", "PKU-Alignment/ProgressGym-HistLlama3-8B-C016-pretrain-v0.2", "PKU-Alignment/ProgressGym-HistLlama3-8B-C017-instruct-v0.2", "mlabonne/NeuralDarewin-7B", "0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther", "openlm-research/open_llama_3b_v2", "Nobitaxi/InternLM2-chat-7B-SQL", "testUser/Qwen3-1.7b-Medical-R1-sft", "mlabonne/Zebrafish-7B", "mlabonne/NeuralPipe-7B-slerp", "laion/openthoughts-4-code-qwen3-32b-annotated-7k_qwen3-1.7B_10k", "Fengshenbang/Ziya-LLaMA-13B-v1.1", "arcee-ai/Saul-Instruct-Mistral-7B-Instruct-v0.2-Slerp", "arcee-ai/Saul-Instruct-Clown-7b", "prithivMLmods/Megatron-Opus-7B-Exp", "Vikhrmodels/QVikhr-3-8B-Instruction", "TheBloke/Nous-Hermes-13B-SuperHOT-8K-fp16", "TheBloke/UltraLM-13B-fp16", "PocketDoc/Dans-TotSirocco-7b", "LLM-Research/Meta-Llama-3.1-8B", "Qwen/Qwen2.5-Coder-32B", "Qwen/Qwen2.5-7B-Instruct-1M", "Qwen/Qwen2-57B-A14B-Instruct", "Qwen/Qwen2.5-14B-Instruct-1M", "Qwen/Qwen-1_8B-Chat", "Qwen/Qwen1.5-MoE-A2.7B-Chat", "Qwen/Qwen1.5-MoE-A2.7B", "Qwen/Qwen1.5-14B-Chat", "Qwen/Qwen1.5-14B", "Qwen/Qwen-14B", "deepseek-ai/DeepSeek-Coder-V2-Lite-Base", "TheBloke/tulu-13B-fp16", "TheBloke/Kimiko-Mistral-7B-fp16", "TheBloke/Llama-2-13B-fp16", "mlabonne/Monarch-7B", "TheBloke/tulu-7B-fp16", "01ai/Yi-9B", "TheBloke/koala-7B-HF", "AI-ModelScope/txgemma-2b-predict", "LLM-Research/OLMo-7B-0724-SFT-hf", "JsonZhang02/Llama3.2-1B-PCL", "PKU-Alignment/ProgressGym-HistLlama3-8B-C019-instruct-v0.2", "PKU-Alignment/ProgressGym-HistLlama3-8B-C016-instruct-v0.2", "FreedomIntelligence/AceGPT-v1.5-13B-Chat", "MediaTek-Research/Breeze-7B-Base-v0_1", "OpenBuddy/openbuddy-llama3-8b-v21.1-8k", "HIT-TMG/Mixtral_13B_Chat_RAG-Reader", "PKU-Alignment/ProgressGym-HistLlama3-8B-C014-pretrain-v0.2", "arcee-ai/arcee-lite", "X-D-Lab/MindChat-Qwen2-4B", "mlabonne/NeuralMonarch-7B", "ibm-granite/granite-3b-code-instruct-2k", "LLM-Research/OLMo-7B-Twin-2T-hf", "PocketDoc/Dans-AdventurousWinds-Mk2-7b", "LLM-Research/Qwen2-Math-7B", "MediaTek-Research/Breeze-7B-Base-v1_0", "LLM-Research/layerskip-llama2-13B", "prithivMLmods/TESS-QwenRe-1.5B", "prithivMLmods/Octantis-QwenR1-1.5B", "prithivMLmods/Qwen3-1.7B-ft-bf16", "prithivMLmods/Theta-Crucis-0.6B-Turbo1", "prithivMLmods/Omega-Qwen3-Atom-8B", "prithivMLmods/Mintaka-Qwen3-1.6B-V3.1", "NousResearch/Yarn-Llama-2-7b-64k", "prithivMLmods/Panacea-MegaScience-Qwen3-1.7B", "prithivMLmods/TOI-157-Phi-4-Reasoning-Mini", "prithivMLmods/Vulpecula-4B", "LLM-Research/OLMo-7B-0424-hf", "LLM-Research/OLMo-7B-hf", "LLM-Research/OLMo-7B-SFT-hf", "AI-ModelScope/starcoder2-7b", "LLM-Research/OLMo-7B-0724-hf", "OpenBMB/BitCPM4-1B", "LLM-Research/truthfulqa-truth-judge-llama2-7B", "LLM-Research/OLMo-1B-0724-hf", "HIT-TMG/Qwen1.5-14B-Chat_RAG-Reader", "OpenBMB/MiniCPM4-MCP", "AI-ModelScope/sqlcoder-7b-2", "FuseAI/OpenChat-3.5-7B-SOLAR-v2.0", "JsonZhang02/Llama3.2-1B-SFT", "MaziyarPanahi/neural-chat-7b-v3-2-Mistral-7B-Instruct-v0.1", "MaziyarPanahi/SauerkrautLM-7b-HerO-Mistral-7B-Instruct-v0.1", "prithivMLmods/Segue-Qwen3_DeepScaleR-Preview", "NovaSky-AI/Sky-T1-7B-Zero", "PKU-Alignment/ProgressGym-HistLlama3-8B-C020-pretrain-v0.2", "NovaSky-AI/Sky-T1-7B-step2", "NousResearch/CodeLlama-7b-hf-flash", "LLM-Research/layerskip-llama3-8B", "LLM-Research/OLMo-1B-hf", "PKU-Alignment/ProgressGym-HistLlama3-8B-C018-pretrain-v0.2", "NousResearch/CodeLlama-7b-Instruct-hf-flash", "Nexusflow/NexusRaven-V2-13B", "AI-ModelScope/NuExtract-v1.5", "NousResearch/Nous-Capybara-3B-V1.9", "NousResearch/Nous-Capybara-7B-V1", "NousResearch/Yarn-Solar-10b-32k", "LLM-Research/Llama-Guard-4-12B", "OpenPipe/gemma-3-4b-it-text-only-2", "OpenPipe/Deductive-Reasoning-Qwen-14B", "OpenPipe/gemma-3-12b-it-text-only", "AI-MO/NuminaMath-7B-CoT", "GAIR/Abel-7B-001", "prithivMLmods/Novaeus-Promptist-7B-Instruct", "SakanaAI/EvoLLM-JP-v1-7B", "FreedomIntelligence/Apollo-1.8B", "PKU-Alignment/ProgressGym-HistLlama3-8B-C013-instruct-v0.2", "PKU-Alignment/ProgressGym-HistLlama3-8B-C013-pretrain-v0.2", "PKU-Alignment/ProgressGym-HistLlama3-8B-C015-pretrain-v0.2", "PKU-Alignment/ProgressGym-HistLlama3-8B-C014-instruct-v0.2", "PKU-Alignment/ProgressGym-HistLlama3-8B-C021-instruct-v0.2", "NousResearch/Meta-Llama-3.1-8B", "OpenPipe/Qwen3-14B-Instruct", "unsloth/OpenHermes-2.5-Mistral-7B", "OpenBuddy/openbuddy-mistral-22b-v21.1-32k", "FlyDutch/telechat2-7b-Cot", "HuggingFaceH4/mistral-7b-sft-alpha", "PAI/pai-qwen1_5-7b-doc2qa", "PKU-Alignment/ProgressGym-HistLlama3-8B-C018-instruct-v0.2", "Magpie-Align/Llama-3-8B-Tulu-330K", "prithivMLmods/Blaze.1-27B-Preview", "allenai/OLMo-7B-0424-SFT-hf", "mlabonne/Meta-Llama-3-8B", "LLM-Research/layerskip-codellama-7B", "prithivMLmods/Sculptor-Qwen3_Med-Reasoning", "prithivMLmods/SmolLM2-360M-Grpo-r999", "prithivMLmods/SmolLM2-1.7B-Open-Thought", "LLM-Research/open-instruct-llama2-sharegpt-7b", "prithivMLmods/SmolLM2_135M_Grpo_Checkpoint", "OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.1-200k", "OpenBuddy/openbuddy-zero-3b-v21.2-32k", "YeungNLP/firefly-llama2-7b-chat", "OpenBuddy/openbuddy-zero-14b-v22.3-32k", "OpenBuddy/openbuddy-yi1.5-9b-v21.1-32k", "FuseAI/OpenChat-3.5-7B-Starling-v2.0", "prithivMLmods/Qwen-7B-Distill-Reasoner", "FuseAI/OpenChat-3.5-7B-InternLM-v2.0", "prithivMLmods/Galactic-Qwen-14B-Exp1", "prithivMLmods/Sombrero-R1-14B-Elite13", "prithivMLmods/Sombrero-Opus-14B-Elite13", "TheBloke/Planner-7B-fp16", "AI-ModelScope/speed-synthesis-8b-senior", "PocketDoc/Dans-AdventurousWinds-7b", "MaziyarPanahi/calme-3.2-baguette-3b", "MaziyarPanahi/calme-3.2-instruct-3b", "IntervitensInc/intv_ai_mk11", "prithivMLmods/Muscae-Qwen3-UI-Code-4B", "NousResearch/Llama-2-7b-hf", "prithivMLmods/Pocket-Llama-3.2-3B-Instruct", "OpenBuddy/openbuddy-openllama-13b-v7-fp16", "LLM-Research/WildLlama-7b-assistant-only", "prithivMLmods/Raptor-X2", "OpenBuddy/openbuddy-qwen1.5-14b-v20.1-32k", "NaniDAO/Meta-Llama-3.1-8B-Instruct-ablated-v1", "LLM-Research/OLMo-7B-Instruct-hf", "OpenBuddy/openbuddy-zen-3b-v21.2-32k", "OpenBuddy/openbuddy-qwen1.5-14b-v21.1-32k", "LLM-Research/llama2-7b-WildJailbreak", "JunHowie/MiniCPM4-8B", "OpenBuddy/openbuddy-coder-15b-v10-bf16", "JunHowie/MiniCPM4-0.5B", "OpenDevin/CodeQwen1.5-7B-OpenDevin", "OpenBuddy/openbuddy-mistral-10b-v17.1-32k", "PAI/DistilQwen2.5-DS3-0324-7B", "OpenBuddy/openbuddy-llama2-13b64k-v15", "OpenBuddy/openbuddy-falcon-7b-v5-fp16", "NousResearch/Hermes-2-Theta-Llama-3-8B", "NousResearch/Hermes-2-Pro-Mistral-7B", "OpenBuddy/openbuddy-openllama-7b-v5-fp16", "PAI/DistillQwen-ThoughtY-8B", "BSC-LT/salamandra-2b", "pfnet/nekomata-7b-pfn-qfin-inst-merge", "BSC-LT/experimental7b-rag-instruct", "PKU-Alignment/ProgressGym-HistLlama3-8B-C019-pretrain-v0.2", "OpenBuddy/OpenBuddy-R10528DistillQwen-14B-v27.4-200K", "OpenBuddy/OpenBuddy-R10528DistillQwen-14B-v27.1", "OpenBuddy/SimpleChat-4B-V1", "AI-ModelScope/granite-8b-code-base-4k", "mlabonne/NeuralHermes-2.5-Mistral-7B", "BSC-LT/experimental7b-rag", "prithivMLmods/SmolLM2_135M_Grpo_Gsm8k", "OpenBuddy/openbuddy-zen-3b-v21.1-32k", "PKU-Alignment/ProgressGym-HistLlama3-8B-C015-instruct-v0.2", "prithivMLmods/QwQ-LCoT1-Merged", "mlabonne/NeuralBeagle14-7B", "PKU-Alignment/ProgressGym-HistLlama3-8B-C020-instruct-v0.2", "mlabonne/NeuralMarcoro14-7B", "PKU-Alignment/ProgressGym-HistLlama3-8B-C017-pretrain-v0.2", "FuseAI/OpenChat-3.5-7B-Mixtral-v2.0", "mlabonne/FrankenMonarch-7B", "stabilityai/stablelm-tuned-alpha-3b", "prithivMLmods/Viper-Coder-v1.5-r999", "prithivMLmods/Galactic-Qwen-14B-Exp2", "HuggingFaceTB/cosmo-1b", "LLM-Research/WildLlama-7b-user-assistant", "OpenBuddy/openbuddy-llama2-13b-v8.1-fp16", "prithivMLmods/Regulus-Qwen3-R1-Llama-Distill-1.7B", "LLM-Research/OLMo-7B-0424-SFT-hf", "huihui-ai/MicroThinker-1B-Preview", "OpenBuddy/openbuddy-openllama-3b-v10-bf16", "LLM-Research/digital-socrates-13b", "prithivMLmods/Viper-Coder-v1.6-r999", "prithivMLmods/Magpie-Qwen-DiMind-1.7B", "BAAI/CareBot_Medical_multi-llama3-8b-base", "NousResearch/Meta-Llama-3-8B", "OpenBuddy/openbuddy-llama2-13b-v15p1-64k", "NousResearch/Yarn-Mistral-7b-64k", "PrimeIntellect/DeepSeek-R1-Distill-Qwen-1.5B", "Undi95/Meta-Llama-3-8B-Instruct-hf", "FuseAI/OpenChat-3.5-7B-Mixtral", "prithivMLmods/Viper-Coder-Hybrid-v1.3", "OpenBuddy/openbuddy-qwen2.5llamaify-7b-v23.1-200k", "LLM-Research/mistral-7b", "OpenBuddy/openbuddy-qwen2.5llamaify-14b-v23.3-200k", "prithivMLmods/Viper-Coder-HybridMini-v1.3", "OpenBuddy/openbuddy-atom-13b-v9-bf16", "OpenBuddy/openbuddy-llama3.2-3b-v23.2-131k", "ibm-granite/granite-3b-code-instruct-128k", "PierreZCW/Breeze-7B-Instruct-v1_0", "mlabonne/Marcoro14-7B-slerp", "AI-ModelScope/openbuddy-falcon-7b-v15-fp16", "AI-ModelScope/falcon-7b", "BAAI/AquilaChat2-7B", "PrimeIntellect/Qwen3-0.6B", "OuteAI/Lite-Oute-1-65M-Instruct", "AI-ModelScope/granite-3b-code-instruct-128k", "PrimeIntellect/Qwen3-8B", "OpenBuddy/openbuddy-falcon-7b-v6-bf16", "MaziyarPanahi/calme-3.1-instruct-3b", "LLM-Research/open-instruct-llama2-sharegpt-dpo-7b", "PocketDoc/Dans-PersonalityEngine-v1.0.0-8b", "FuseAI/FuseChat-Llama-3.1-8B-Instruct", "OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k", "OpenBuddy/openbuddy-deepseekcoder-6b-v16.1-32k", "HuggingFaceTB/SmolLM-1.7B", "LLM-Research/Llama-4-Scout-17B-16E-Instruct", "argilla/distilabeled-Marcoro14-7B-slerp-full", "HuggingFaceTB/SmolLM2-1.7B", "argilla/distilabeled-Marcoro14-7B-slerp", "l3utterfly/open-llama-3b-v2-layla", ] # ══════════════════════════════════════════════════════════ # 全局状态(供 /status 展示) # ══════════════════════════════════════════════════════════ _state = { "strategy_id": STRATEGY_ID, "phase": "starting", # starting | submitting | done | error "total": len(ALL_MODEL_IDS), "submitted": 0, "failed": 0, "started_at": None, "finished_at": None, } _shutdown = threading.Event() # ══════════════════════════════════════════════════════════ # HTTP 服务 # ══════════════════════════════════════════════════════════ class Handler(BaseHTTPRequestHandler): def do_GET(self): if self.path == "/health": self._json({"status": "ok"}) elif self.path == "/status": self._json(_state) else: self._json({"error": "not found"}, 404) def _json(self, body: dict, code: int = 200): payload = json.dumps(body, default=str).encode() self.send_response(code) self.send_header("Content-Type", "application/json") self.send_header("Content-Length", str(len(payload))) self.end_headers() self.wfile.write(payload) def log_message(self, fmt, *args): print(f"[http] {self.address_string()} {fmt % args}", flush=True) def _run_http(): server = ThreadingHTTPServer((HTTP_HOST, HTTP_PORT), Handler) server.timeout = 1 print(f"[http] 监听 {HTTP_HOST}:{HTTP_PORT}", flush=True) while not _shutdown.is_set(): server.handle_request() server.server_close() print("[http] 已关闭", flush=True) # ══════════════════════════════════════════════════════════ # 业务逻辑 # ══════════════════════════════════════════════════════════ def _submit_task(token: str, model_id: str) -> Tuple[bool, str]: headers = { "Content-Type": "application/json", "Authorization": f"Bearer {token}", } config_content = f"""docker_image: harbor.4pd.io/hardcore-tech/cambricon-mlu370-pytorch:v25.01-torch2.5.0-torchmlu1.24.1-ubuntu22.04-py310 nv_docker_image: harbor.4pd.io/dooke/vllm/vllm/vllm-openai:v0.11.0 framework: vllm storage: gpfs modelhub_options: srcRelativePath: leaderboard/modelHubXC/{model_id} mountPoint: /model sut_config: values: gpu_num: 1 env: - name: MAX_MODEL_LEN value: 8192 command: ["vllm", "serve", "/model", "--port", "8000", "--served-model-name", "llm", "--max-model-len", "8192", "--trust-remote-code", "--dtype", "float16"] ref_config: values: cpu_num: 2 gpu_num: 1 env: - name: MAX_MODEL_LEN value: 8192 command: ["vllm", "serve", "/model", "--port", "80", "--served-model-name", "llm", "--max-model-len", "8192", "--trust-remote-code", "--dtype", "float16"] """ payload = { "contestApiToken": CONTEST_API_TOKEN, "contributors": CONTRIBUTORS, "gpuTypes": [GPU_TYPE], "taskType": TASK_TYPE, "modelId": model_id, "framework": "vllm", "strategyId": STRATEGY_ID, # 平台要求 "submissionConfig": [{ "config": config_content, "gpuType": GPU_TYPE, "taskType": TASK_TYPE, }], } print(f"[payload] {json.dumps(payload, indent=2, ensure_ascii=False)}", flush=True) try: resp = requests.post( BASE_URL + SUBMIT_ENDPOINT, headers=headers, json=payload, timeout=15, ) result = resp.json() if result.get("code") == 0: task_id = result.get("data", {}).get("id", "") print(f"[worker] OK {model_id} task_id={task_id}", flush=True) return True, task_id else: print(f"[worker] FAIL {model_id}: {result.get('message')}", flush=True) return False, "" except Exception as e: print(f"[worker] ERROR {model_id}: {e}", flush=True) return False, "" def _run_worker(): _state["started_at"] = datetime.utcnow().isoformat() _state["phase"] = "submitting" successful: List[Tuple[str, str]] = [] token = AUTH_TOKEN print("[worker] 使用预设 Token,跳过登录", flush=True) for model_id in ALL_MODEL_IDS: if _shutdown.is_set(): break ok, task_id = _submit_task(token, model_id) if ok: _state["submitted"] += 1 successful.append((task_id, model_id)) else: _state["failed"] += 1 # 写入结果文件 try: with open("submitted_validation_tasks.txt", "w", encoding="utf-8") as f: for tid, mid in successful: f.write(f"{tid}\t{mid}\n") except Exception: pass _state["finished_at"] = datetime.utcnow().isoformat() _state["phase"] = "done" print( f"[worker] 完成 submitted={_state['submitted']} failed={_state['failed']}", flush=True, ) # 提交完成后继续保持进程存活,等待平台停止 # ══════════════════════════════════════════════════════════ # 入口 # ══════════════════════════════════════════════════════════ def _handle_signal(signum, _frame): print(f"[main] 收到信号 {signum},正在关闭...", flush=True) _shutdown.set() def main(): signal.signal(signal.SIGTERM, _handle_signal) signal.signal(signal.SIGINT, _handle_signal) # HTTP 服务线程 http_thread = threading.Thread(target=_run_http, daemon=False) http_thread.start() # 提交任务线程 worker_thread = threading.Thread(target=_run_worker, daemon=True) worker_thread.start() # 主线程等待 shutdown _shutdown.wait() print("[main] 等待 HTTP 服务关闭...", flush=True) http_thread.join(timeout=5) print("[main] 退出", flush=True) if __name__ == "__main__": main()