From 112a650ba1c9823456e253ed22fb6bbefb20b30a Mon Sep 17 00:00:00 2001 From: qiliguo Date: Wed, 3 Dec 2025 12:07:49 +0800 Subject: [PATCH 1/2] update model list --- README.md | 188 ++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 163 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index d4e4429..e772bbc 100644 --- a/README.md +++ b/README.md @@ -50,28 +50,166 @@ ``` 模型测试结果 -| 模型名称 | A100出字速度(字/秒) | 沐曦卡出字速度(字/秒) | 备注 | -|---------|-----|-----|---------------------| -| unsloth/gpt-oss-20b-BF16 | 80.1 | 52.9 | | -| Qwen/Qwen3-4B | 171.8 | 112.3 | | -| Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int8 | 168.5 | 135.3 | | -| Qwen/Qwen-1_8B-Chat-Int4 | 536.4 | 192.9 | | -| Qwen/Qwen1.5-1.8B-Chat-GPTQ-Int4 | 153.1 | 132.4 | | -| deepseek-ai/deepseek-moe-16b-chat | 68.1 | 60.7 | | -| Qwen/Qwen2-7B-Instruct-GPTQ-Int4 | 129.2 | 127.5 | | -| Qwen2.5-7B-Instruct-GPTQ-Int4 | 118.1 | 133.1 | | -| tclf90/glm-4-9b-chat-GPTQ-Int4 | 92.2 | 97.7 | | -| Qwen/Qwen2.5-14B-Instruct-GPTQ-Int4 | 79.6 | 72.3 | | -| Qwen/Qwen-14B-Chat-Int8 | 103.1 | 56.4 | 该模型在沐曦卡上生成质量要差于A100 | -| Qwen/Qwen2.5-14B-Instruct-GPTQ-Int8 | 81.4 | 70.4 | | -| tclf90/qwq-32b-gptq-int4 | 60.3 | 53.5 | | -| Qwen/Qwen2.5-32B-Instruct-GPTQ-Int4 | 60.1 | 54.6 | | -| Qwen/Qwen1.5-32B-Chat-GPTQ-Int4 | 58.2 | 51.3 | | -| tclf90/Qwen3-32B-GPTQ-Int8 | 54.0 | 41.3 | | -| tclf90/deepseek-r1-distill-qwen-32b-gptq-int8 | 59.9 | 45.8 | | -| Qwen/Qwen2.5-72B-Instruct-GPTQ-Int4 | 46.6|29.5 | | -| Qwen/Qwen2-72B-Instruct-GPTQ-Int4 | 48.2| 29.7| | -| Qwen/Qwen3-4B-Instruct-2507 | 65.4| 71.8| | -| Qwen/Qwen3-4B-Thinking-2507 |73.4 |52.6 | | -| tclf90/Qwen3-32B-GPTQ-Int4 | 54.4| 38.4 | | -| Qwen/Qwen3-0.6B-GPTQ-Int8 |117.4 | 95.0 | | \ No newline at end of file + +| 模型名称 | A100出字速度(字/秒) | 沐曦卡出字速度(字/秒) | A100输出质量 | 输出质量 | A100首字延迟(秒) | 首字延迟(秒) | 备注 | +| ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | +| 01ai/Yi-1.5-6B-Chat | 109.5427 | 109.5463 | 85.0000 | 80.0000 | 0.0921 | 0.1178 | | +| 01ai/Yi-6B-Chat | 113.3089 | 83.3881 | 85.0000 | 85.0000 | 0.0663 | 0.0986 | | +| AI-ModelScope/CausalLM-7B | 108.3807 | 86.3852 | 77.5000 | 85.0000 | 0.0798 | 0.1090 | | +| AI-ModelScope/granite-3.1-3b-a800m-instruct | 39.7904 | 41.1946 | 53.7500 | 63.7500 | 0.2164 | 0.2640 | | +| AI-ModelScope/granite-7b-instruct | 106.5448 | 92.1454 | 32.5000 | 27.5000 | 0.1004 | 0.1307 | | +| AI-ModelScope/Hermes-3-Llama-3.1-8B | 104.0769 | 69.4578 | 85.0000 | 85.0000 | 0.0827 | 0.1063 | | +| AI-ModelScope/mathstral-7B-v0.1 | 81.6464 | 56.6757 | 41.2500 | 47.5000 | 0.1011 | 0.1811 | | +| AI-ModelScope/Ministral-8B-Instruct-2410 | 73.7056 | 66.5971 | 85.0000 | 71.2500 | 0.0997 | 0.1629 | | +| AI-ModelScope/Mistral-7B-v0.1 | 74.3966 | 51.7199 | 12.5000 | 15.0000 | 0.1456 | 0.1329 | | +| AI-ModelScope/TinyLlama-1.1B-Chat-v0.4 | 58.0479 | 102.3987 | 22.5000 | 22.5000 | 0.0747 | 0.1156 | | +| allenai/OLMoE-1B-7B-0924-Instruct | 66.9544 | 59.7696 | 37.5000 | 42.5000 | 0.2127 | 0.2610 | | +| BAAI/Finance-llama3_1_8B_instruct | 95.4692 | 65.3383 | 58.7500 | 57.5000 | 0.0812 | 0.1262 | | +| BAAI/Hospitality-llama3_1_8B_instruct | 91.7153 | 62.6340 | 61.2500 | 56.2500 | 0.0650 | 0.1210 | | +| BAAI/Technology-llama3_1_8B_instruct | 80.6653 | 60.4259 | 32.5000 | 36.2500 | 0.0844 | 0.1147 | | +| ByteDance-Seed/Seed-OSS-36B-Instruct | 38.2767 | 17.4623 | 86.7500 | 88.5000 | 0.1400 | 0.3135 | | +| codefuse-ai/TestGPT-7B | 68.6996 | 49.0781 | 15.0000 | 15.0000 | 0.0756 | 0.1193 | | +| CohereLabs/aya-expanse-8B | 88.7216 | 78.2155 | 86.7500 | 86.7500 | 0.0687 | 3.8936 | | +| Cylingo/Xinyuan-LLM-14B-0428 | 71.7868 | 49.5901 | 86.7500 | 89.2500 | 0.0860 | 0.2501 | | +| deepseek-ai/deepseek-llm-7b-base | 141.4714 | 93.0921 | 20.0000 | 22.5000 | 0.1231 | 0.1760 | | +| deepseek-ai/deepseek-llm-7b-chat | 124.9381 | 99.4562 | 85.0000 | 81.2500 | 0.0581 | 0.5611 | | +| deepseek-ai/deepseek-moe-16b-chat | 68.0789 | 60.7028 | 85.0000 | 68.7500 | 0.2346 | 0.2880 | | +| Fengshenbang/Ziya-LLaMA-13B-v1 | 56.6471 | 39.3946 | 61.2500 | 58.7500 | 0.0616 | 0.1163 | | +| FlagAlpha/Llama3-Chinese-8B-Instruct | 100.3664 | 70.7829 | 38.7500 | 30.0000 | 0.1091 | 0.1442 | | +| HuggingFaceH4/zephyr-7b-beta | 97.4954 | 51.7445 | 66.2500 | 47.5000 | 0.0802 | 0.1135 | | +| iic/WritingBench-Critic-Model-Qwen-7B | 125.5657 | 96.5148 | 87.5000 | 87.5000 | 0.0593 | 0.1159 | | +| InfiniAI/Megrez-3b-Instruct | 142.4497 | 157.9848 | 85.0000 | 85.0000 | 0.0815 | 0.0946 | | +| JunHowie/Qwen3-0.6B-GPTQ-Int4 | 108.2950 | 67.6596 | 58.7500 | 21.2500 | 0.0711 | 0.1206 | | +| JunHowie/Qwen3-1.7B-GPTQ-Int8 | 124.1477 | 131.5458 | 71.2500 | 38.7500 | 0.0900 | 0.1848 | | +| JunHowie/Qwen3-8B-GPTQ-Int4 | 82.8148 | 109.8259 | 86.7500 | 52.5000 | 0.0800 | 0.1575 | | +| Kedreamix/Xinjing-LM | 137.3682 | 150.4469 | 80.0000 | 66.2500 | 0.0710 | 0.1214 | | +| LLM-Research/gemma-2-9b-it | 47.0337 | 47.1786 | 85.0000 | 66.2500 | 0.1125 | 0.2271 | | +| LLM-Research/gemma-3-1b-it | 39.6315 | 43.8421 | 47.5000 | 71.2500 | 0.2256 | 0.2829 | | +| LLM-Research/gemma-3-1b-it | 38.3524 | 33.8285 | 47.5000 | 36.2500 | 0.2080 | 0.1673 | | +| LLM-Research/Llama-3.2-3B | 137.6931 | 145.1816 | 15.0000 | 15.0000 | 0.0891 | 0.1399 | | +| LLM-Research/Llama-3.2-3B-Instruct | 107.1550 | 95.4333 | 63.7500 | 68.7500 | 0.0533 | 0.1868 | | +| LLM-Research/Llama-Guard-3-8B | 74.5048 | 73.5737 | 33.7500 | 22.5000 | 0.0861 | 0.1248 | | +| LLM-Research/Llama3-8B-Chinese-Chat | 69.0446 | 66.6263 | 85.0000 | 85.0000 | 0.0823 | 0.3008 | | +| LLM-Research/Meta-Llama-3-8B-Instruct | 167.5121 | 27.8093 | 85.0000 | 92.7500 | 0.0778 | 0.2426 | | +| LLM-Research/Meta-Llama-3-8B-Instruct-GPTQ | 199.0842 | 175.9663 | 57.5000 | 71.2500 | 0.1028 | 0.1520 | | +| LLM-Research/Meta-Llama-3.1-70B-Instruct-AWQ-INT4 | 34.9493 | 18.1700 | 87.5000 | 85.0000 | 0.1327 | 0.2035 | | +| LLM-Research/Meta-Llama-3.1-8B-Instruct-AWQ-INT4 | 93.1869 | 77.1093 | 85.0000 | 85.0000 | 0.0665 | 0.1753 | | +| LLM-Research/OpenHermes-2.5-Mistral-7B | 78.2992 | 51.0257 | 75.0000 | 75.0000 | 0.0797 | 0.1161 | | +| LLM-Research/Phi-3-mini-4k-instruct | 65.0090 | 59.8164 | 45.0000 | 40.0000 | 0.0632 | 0.1004 | | +| LLM-Research/Qwen2-7B | 170.2887 | 126.6331 | 77.5000 | 78.2500 | 0.0787 | 0.1345 | | +| LLM-Research/Starling-LM-7B-beta | 73.7987 | 49.5925 | 66.2500 | 71.2500 | 0.0952 | 0.5098 | | +| LLM-Research/tulu-2-dpo-7b | 60.8343 | 47.6062 | 70.0000 | 72.5000 | 0.0618 | 0.0997 | | +| m-a-p/neo_7b_instruct_v0.1 | 178.2949 | 137.2681 | 71.2500 | 72.5000 | 0.0839 | 0.1358 | | +| mistralai/Devstral-Small-2507 | 56.7985 | 42.3061 | 88.7500 | 91.0000 | 0.0698 | 0.1193 | | +| mistralai/Mistral-Small-24B-Instruct-2501 | 57.3654 | 42.7280 | 86.7500 | 87.5000 | 0.0899 | 0.1340 | | +| mlabonne/EvolCodeLlama-7b | 53.7677 | 43.3601 | 5.0000 | 5.0000 | 0.1003 | 0.1287 | | +| modelscope/zephyr-7b-beta | 81.4112 | 52.4249 | 65.0000 | 47.5000 | 0.0835 | 0.2036 | | +| neuralmagic/Meta-Llama-3.1-8B-quantized.w8a8 | 157.2308 | 130.4350 | 10.0000 | 10.0000 | 0.0638 | 0.2007 | | +| neuralmagic/Qwen2-1.5B-Instruct-quantized.w8a8 | 133.1439 | 136.2916 | 80.0000 | 80.0000 | 0.0564 | 0.0941 | | +| neuralmagic/SmolLM-1.7B-Instruct-quantized.w8a8 | 105.3004 | 107.6412 | 10.0000 | 10.0000 | 0.0886 | 0.0876 | | +| nv-community/Minitron-4B-Base | 63.8192 | 69.2841 | 5.0000 | 5.0000 | 0.0706 | 0.0917 | | +| openai-mirror/gpt-oss-safeguard-20b | 112.4384 | 86.9659 | 61.2500 | 72.0000 | 0.2067 | 0.4129 | | +| OpenBMB/MiniCPM-1B-sft-bf16 | 61.4045 | 71.6924 | 66.2500 | 75.0000 | 0.0748 | 0.1010 | | +| OpenBMB/MiniCPM-2B-dpo-fp16 | 61.8623 | 82.9386 | 80.0000 | 71.2500 | 0.1056 | 0.1071 | | +| OpenBMB/MiniCPM-2B-sft-fp32 | 74.4678 | 89.2962 | 65.0000 | 55.0000 | 0.0996 | 0.1408 | | +| OpenBMB/MiniCPM3-4B | 28.2210 | 21.3110 | 86.7500 | 86.7500 | 0.1020 | 0.1905 | | +| OpenBMB/MiniCPM4-0.5B | 82.2793 | 93.0776 | 23.7500 | 15.0000 | 0.0606 | 0.1143 | | +| OpenBMB/MiniCPM4-8B | 60.5837 | 57.0162 | 87.5000 | 52.5000 | 0.0921 | 0.1482 | | +| OpenBMB/MiniCPM4-8B-marlin-vLLM | 57.5381 | 58.2351 | 85.0000 | 72.0000 | 0.1033 | 0.1107 | | +| OpenBMB/MiniCPM4.1-8B | 61.4557 | 51.5120 | 89.2500 | 86.7500 | 0.0784 | 0.1290 | | +| PaddlePaddle/ERNIE-4.5-0.3B-PT | 144.5465 | 185.2054 | 52.5000 | 47.5000 | 0.0837 | 0.0902 | | +| prithivMLmods/Llama-Sentient-3.2-3B-Instruct | 147.0239 | 168.6519 | 40.0000 | 40.0000 | 0.0661 | 0.1230 | | +| prithivMLmods/Qwen-UMLS-7B-Instruct | 122.8389 | 89.8736 | 55.0000 | 55.0000 | 0.0584 | 0.1061 | | +| QuantTrio/Qwen3-30B-A3B-Instruct-2507-GPTQ-Int8 | 40.1016 | 22.6581 | 91.7500 | 68.0000 | 0.0915 | 0.2704 | | +| Qwen/Qwen-1_8B-Chat-Int4 | 536.3839 | 192.9054 | 28.7500 | 25.0000 | 0.0517 | 0.0936 | | +| Qwen/Qwen-1_8B-Chat-Int8 | 425.9918 | 95.9929 | 42.5000 | 85.0000 | 0.0549 | 0.1754 | | +| Qwen/Qwen-14B | 64.3123 | 157.3503 | 38.7500 | 33.7500 | 0.0912 | 0.0858 | | +| Qwen/Qwen-14B-Chat-Int4 | 84.9682 | 148.9078 | 68.7500 | 85.0000 | 0.0971 | 0.1298 | | +| Qwen/Qwen-14B-Chat-Int8 | 103.0940 | 56.3855 | 85.0000 | 21.2500 | 0.0758 | 0.1443 | 该模型在沐曦卡上生成质量要差于A100 | +| Qwen/Qwen-72B-Chat-Int4 | 55.1679 | 260.1285 | 85.0000 | 47.5000 | 0.1360 | 0.1411 | | +| Qwen/Qwen-72B-Chat-Int8 | 45.9768 | 83.0130 | 68.0000 | 58.7500 | 0.1172 | 0.1511 | | +| Qwen/Qwen-7B-Chat-Int4 | 119.8944 | 102.6521 | 55.0000 | 55.0000 | 0.0831 | 0.1092 | | +| Qwen/Qwen-7B-Chat-Int4 | 117.8753 | 89.0941 | 55.0000 | 37.5000 | 0.0761 | 0.1086 | | +| Qwen/Qwen-7B-Chat-Int8 | 128.7233 | 107.4733 | 40.0000 | 80.0000 | 0.0545 | 0.1578 | | +| Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int8 | 168.5100 | 135.2830 | 33.7500 | 33.7500 | 0.0530 | 0.0869 | | +| Qwen/Qwen1.5-1.8B-Chat-GPTQ-Int4 | 153.0596 | 132.3559 | 33.7500 | 63.7500 | 0.0557 | 0.1316 | | +| Qwen/Qwen1.5-14B-Chat-AWQ | 98.3736 | 74.0728 | 91.5000 | 88.5000 | 0.0725 | 0.1188 | | +| Qwen/Qwen1.5-14B-Chat-GPTQ-Int4 | 77.7489 | 92.8893 | 88.5000 | 88.5000 | 0.0863 | 0.1030 | | +| Qwen/Qwen1.5-14B-Chat-GPTQ-Int8 | 90.9851 | 79.5202 | 88.5000 | 90.2500 | 0.0716 | 0.1136 | | +| Qwen/Qwen1.5-32B-Chat-GPTQ-Int4 | 58.2218 | 51.3081 | 92.2500 | 91.0000 | 0.0775 | 0.1392 | | +| Qwen/Qwen1.5-72B-Chat-GPTQ-Int4 | 47.0482 | 28.5930 | 92.7500 | 92.7500 | 0.1314 | 0.3305 | | +| Qwen/Qwen1.5-7B-Chat-GPTQ-Int4 | 111.7481 | 110.7277 | 88.0000 | 86.7500 | 0.0857 | 0.0978 | | +| Qwen/Qwen2-72B-Instruct-GPTQ-Int4 | 48.2245 | 29.6541 | 92.7500 | 92.7500 | 0.1091 | 0.2308 | | +| Qwen/Qwen2-7B-Instruct-GPTQ-Int4 | 129.1925 | 127.4951 | 88.5000 | 88.5000 | 0.0632 | 0.1595 | | +| Qwen/Qwen2-7B-Instruct-GPTQ-Int8 | 120.7039 | 120.5521 | 89.2500 | 91.0000 | 0.0589 | 0.0928 | | +| Qwen/Qwen2.5-0.5B | 190.6378 | 143.7020 | 12.5000 | 80.0000 | 0.0569 | 0.0966 | | +| Qwen/Qwen2.5-0.5B-Instruct | 157.0171 | 74.5845 | 66.2500 | 90.0000 | 0.0524 | 0.1228 | | +| Qwen/Qwen2.5-1.5B-Instruct | 132.3709 | 108.0231 | 85.0000 | 85.0000 | 0.0784 | 0.1907 | | +| Qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int4 | 99.1006 | 111.6715 | 80.0000 | 85.0000 | 0.0832 | 0.0998 | | +| Qwen/Qwen2.5-14B-Instruct-1M | 73.2996 | 50.5570 | 92.7500 | 91.0000 | 0.0736 | 0.1316 | | +| Qwen/Qwen2.5-14B-Instruct-GPTQ-Int4 | 79.5701 | 72.2741 | 91.0000 | 91.0000 | 0.0738 | 0.1219 | | +| Qwen/Qwen2.5-14B-Instruct-GPTQ-Int8 | 81.3902 | 70.3944 | 91.0000 | 91.0000 | 0.0588 | 0.3164 | | +| Qwen/Qwen2.5-32B-Instruct-GPTQ-Int4 | 60.0678 | 54.5765 | 90.5000 | 91.0000 | 0.1023 | 0.1855 | | +| Qwen/Qwen2.5-3B | 102.8492 | 124.4283 | 47.5000 | 50.0000 | 0.0830 | 0.1048 | | +| Qwen/Qwen2.5-72B-Instruct-GPTQ-Int4 | 46.5678 | 29.5256 | 91.0000 | 70.5000 | 0.1013 | 0.2163 | | +| Qwen/Qwen2.5-7B-Instruct-1M | 97.2979 | 92.4331 | 89.2500 | 89.2500 | 0.0685 | 0.1017 | | +| Qwen/Qwen2.5-7B-Instruct-AWQ | 115.5071 | 113.5233 | 88.5000 | 91.0000 | 0.0599 | 0.0982 | | +| Qwen/Qwen2.5-7B-Instruct-GPTQ-Int4 | 118.1169 | 133.1432 | 87.5000 | 87.5000 | 0.0603 | 0.1551 | | +| Qwen/Qwen2.5-Coder-14B-Instruct-AWQ | 47.8161 | 52.9300 | 87.5000 | 87.5000 | 0.0854 | 0.1162 | | +| Qwen/Qwen2.5-Coder-14B-Instruct-GPTQ-Int4 | 70.0316 | 68.4585 | 87.5000 | 73.7500 | 0.0694 | 0.1246 | | +| Qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-Int4 | 60.2992 | 55.2420 | 91.0000 | 91.0000 | 0.1108 | 0.2650 | | +| Qwen/Qwen2.5-Coder-7B | 135.1152 | 115.0369 | 46.7500 | 63.7500 | 0.0979 | 0.2146 | | +| Qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-Int8 | 108.4975 | 100.5769 | 87.5000 | 87.5000 | 0.0646 | 0.1794 | | +| Qwen/Qwen3-0.6B | 115.7984 | 128.5600 | 56.2500 | 72.5000 | 0.0890 | 0.1073 | | +| Qwen/Qwen3-0.6B-GPTQ-Int8 | 117.4333 | 95.0022 | 75.0000 | 33.7500 | 0.0891 | 0.1146 | | +| Qwen/Qwen3-1___7B | 123.6276 | 55.1110 | 81.7500 | 86.7500 | 0.0931 | 0.2069 | | +| Qwen/Qwen3-30B-A3B-Base | 36.5579 | 145.1734 | 86.0000 | 65.0000 | 0.1890 | 0.0983 | | +| Qwen/Qwen3-30B-A3B-GPTQ-Int4 | 41.5809 | 95.4118 | 88.5000 | 66.2500 | 0.0818 | 0.1298 | | +| Qwen/Qwen3-32B-GPTQ-Int8 | 53.9982 | 41.3361 | 86.7500 | 86.7500 | 0.1501 | 0.2713 | | +| Qwen/Qwen3-4B | 167.1038 | 83.0732 | 87.5000 | 66.2500 | 0.0587 | 0.1159 | | +| Qwen/Qwen3-4B | 171.1984 | 105.4759 | 89.2500 | 84.2500 | 0.0598 | 0.1105 | | +| Qwen/Qwen3-4B-AWQ | 76.4882 | 91.4280 | 86.7500 | 88.5000 | 0.0922 | 0.1245 | | +| Qwen/Qwen3-4B-Instruct-2507 | 65.4432 | 71.7888 | 91.7500 | 91.7500 | 0.0966 | 0.1285 | | +| Qwen/Qwen3-4B-SafeRL | 78.8314 | 96.6840 | 87.5000 | 88.5000 | 0.0930 | 0.1174 | | +| Qwen/Qwen3-4B-Thinking-2507 | 73.3587 | 52.6143 | 81.7500 | 88.5000 | 0.0799 | 0.2663 | | +| Qwen/Qwen3-8B-AWQ | 94.2630 | 67.3837 | 88.5000 | 86.7500 | 0.0990 | 0.1425 | | +| QwenCollection/Hercules-Mini-1.8B | 202.5339 | 202.1224 | 28.7500 | 28.7500 | 0.0517 | 0.0855 | | +| QwenCollection/Ragas-critic-llm-Qwen1.5-GPTQ | 170.6923 | 136.0126 | 35.0000 | 25.0000 | 0.0667 | 0.1142 | | +| RUC-DataLab/DeepAnalyze-8B | 84.5026 | 84.9213 | 86.7500 | 83.5000 | 0.1118 | 0.1673 | | +| shakechen/Llama-2-7b-chat-hf | 260.9870 | 34.1061 | 61.2500 | 91.0000 | 0.0892 | 0.1960 | | +| Shanghai_AI_Laboratory/internlm-20b | 54.0570 | 40.9779 | 49.2500 | 71.2500 | 0.1376 | 0.2653 | | +| Shanghai_AI_Laboratory/internlm-chat-20b | 59.3568 | 44.0583 | 85.0000 | 85.0000 | 0.0789 | 0.1403 | | +| Shanghai_AI_Laboratory/internlm2-chat-1_8b | 137.9855 | 150.8975 | 22.5000 | 31.2500 | 0.0874 | 0.1612 | | +| tclf90/Codestral-22B-v0.1-hf-GPTQ-Int4 | 69.1961 | 71.7657 | 80.0000 | 70.0000 | 0.1810 | 0.2461 | | +| tclf90/deepseek-r1-distill-qwen-14b-gptq-int4 | 61.5092 | 59.0025 | 85.0000 | 85.0000 | 0.0786 | 0.1791 | | +| tclf90/deepseek-r1-distill-qwen-32b-gptq-int4 | 63.7344 | 54.3297 | 88.0000 | 86.7500 | 0.1675 | 0.2525 | | +| tclf90/deepseek-r1-distill-qwen-32b-gptq-int8 | 59.9314 | 45.7905 | 86.7500 | 86.7500 | 0.1491 | 0.2839 | | +| tclf90/deepseek-r1-distill-qwen-7b-gptq-int4 | 134.7569 | 141.3986 | 80.0000 | 85.0000 | 0.0892 | 0.2778 | | +| tclf90/glm-4-9b-chat-GPTQ-Int4 | 92.2061 | 97.6775 | 89.2500 | 91.7500 | 0.0735 | 0.4389 | | +| tclf90/glm-4-9b-chat-GPTQ-Int8 | 67.4918 | 93.5178 | 90.5000 | 86.7500 | 0.1037 | 0.1350 | | +| tclf90/Qwen2-14B-merge-GPTQ-Int8 | 68.0706 | 73.0019 | 88.5000 | 88.5000 | 0.0741 | 0.1217 | | +| tclf90/qwen2.5-14b-instruct-1m-gptq-int4 | 80.4909 | 71.7343 | 91.0000 | 91.0000 | 0.0720 | 0.1329 | | +| tclf90/qwen2.5-72b-instruct-gptq-int4 | 38.6166 | 34.9558 | 91.7500 | 52.5000 | 0.1053 | 0.5529 | | +| tclf90/Qwen3-32B-GPTQ-Int4 | 54.3526 | 38.4391 | 86.7500 | 88.5000 | 0.1470 | 0.2276 | | +| tclf90/Qwen3-32B-GPTQ-Int4 | 60.6483 | 42.8220 | 86.7500 | 86.7500 | 0.1651 | 0.2262 | | +| tclf90/Qwen3-32B-GPTQ-Int8 | 48.2039 | 148.4290 | 86.7500 | 70.0000 | 0.1526 | 0.0950 | | +| tclf90/qwq-32b-gptq-int4 | 60.3297 | 53.4619 | 87.5000 | 86.7500 | 0.1581 | 0.4379 | | +| tclf90/qwq-32b-gptq-int8 | 52.0826 | 41.4700 | 86.7500 | 87.5000 | 0.1853 | 0.2502 | | +| TheBloke/Kimiko-7B-fp16 | 52.3951 | 41.4855 | 15.0000 | 15.0000 | 0.0947 | 0.1223 | | +| tiiuae/falcon-7b-instruct | 177.4328 | 147.2327 | 20.0000 | 20.0000 | 0.0906 | 0.1180 | | +| TongyiFinance/Tongyi-Finance-14B-Chat | 161.2871 | 55.1869 | 52.5000 | 63.7500 | 0.0970 | 0.2396 | | +| TongyiFinance/Tongyi-Finance-14B-Chat-Int4 | 144.2085 | 114.2191 | 57.5000 | 38.7500 | 0.0592 | 0.1210 | | +| TongyiFinance/Tongyi-Finance-14B-Chat-Int4 | 147.6310 | 129.5033 | 57.5000 | 57.5000 | 0.0694 | 0.1123 | | +| UnicomAI/Unichat-llama3.2-Chinese-1B | 170.9294 | 181.6792 | 26.2500 | 25.0000 | 0.0594 | 0.1311 | | +| unsloth/Phi-3.5-mini-instruct | 39.4028 | 38.0348 | 80.0000 | 77.5000 | 0.0716 | 0.1253 | | +| XGenerationLab/XiYanSQL-QwenCoder-3B-2502 | 111.9181 | 105.4054 | 80.0000 | 85.0000 | 0.0593 | 0.1060 | | +| Xunzillm4cc/Xunzi-Qwen1.5-4B | 99.2706 | 102.2685 | 22.5000 | 25.0000 | 0.0811 | 0.0982 | | +| Xunzillm4cc/Xunzi-Qwen2-1.5B | 125.9543 | 137.9787 | 20.0000 | 17.5000 | 0.0588 | 0.0903 | | +| ZhipuAI/chatglm3-6b-base | 129.0337 | 40.0233 | 38.7500 | 71.2500 | 0.1349 | 0.2365 | | +| ZhipuAI/glm-4-9b-chat-1m | 102.4445 | 77.4381 | 89.7500 | 88.7500 | 0.0911 | 0.1357 | | +| ZhipuAI/glm-4-9b-chat-hf | 109.1148 | 71.8195 | 85.0000 | 85.0000 | 0.0900 | 0.1838 | | +| ZhipuAI/GLM-Z1-32B-0414 | 55.9116 | 41.5245 | 88.5000 | 88.5000 | 0.1048 | 0.2995 | | +| ZhipuAI/GLM-Z1-9B-0414 | 90.9342 | 71.1734 | 85.0000 | 86.7500 | 0.0879 | 0.9642 | | +| ZhipuAI/LongWriter-glm4-9b | 100.4160 | 81.4277 | 86.7500 | 86.7500 | 0.1221 | 0.1758 | | +| zpeng1989/TCM_DeepSeek_LLM | 98.9219 | 64.9526 | 82.5000 | 80.0000 | 0.0721 | 0.1533 | |⏎ + -- 2.49.1 From b10794c492d4155a1ffe3a529458cad91b5b52c3 Mon Sep 17 00:00:00 2001 From: dog Date: Wed, 3 Dec 2025 17:28:07 +0800 Subject: [PATCH 2/2] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e772bbc..bfd963c 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ 模型测试结果 -| 模型名称 | A100出字速度(字/秒) | 沐曦卡出字速度(字/秒) | A100输出质量 | 输出质量 | A100首字延迟(秒) | 首字延迟(秒) | 备注 | +| 模型名称 | A100出字速度(字/秒) | 曦云C500出字速度(字/秒) | A100输出质量 | 曦云C500输出质量 | A100首字延迟(秒) | 曦云C500首字延迟(秒) | 备注 | | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | | 01ai/Yi-1.5-6B-Chat | 109.5427 | 109.5463 | 85.0000 | 80.0000 | 0.0921 | 0.1178 | | | 01ai/Yi-6B-Chat | 113.3089 | 83.3881 | 85.0000 | 85.0000 | 0.0663 | 0.0986 | | @@ -211,5 +211,5 @@ | ZhipuAI/GLM-Z1-32B-0414 | 55.9116 | 41.5245 | 88.5000 | 88.5000 | 0.1048 | 0.2995 | | | ZhipuAI/GLM-Z1-9B-0414 | 90.9342 | 71.1734 | 85.0000 | 86.7500 | 0.0879 | 0.9642 | | | ZhipuAI/LongWriter-glm4-9b | 100.4160 | 81.4277 | 86.7500 | 86.7500 | 0.1221 | 0.1758 | | -| zpeng1989/TCM_DeepSeek_LLM | 98.9219 | 64.9526 | 82.5000 | 80.0000 | 0.0721 | 0.1533 | |⏎ +| zpeng1989/TCM_DeepSeek_LLM | 98.9219 | 64.9526 | 82.5000 | 80.0000 | 0.0721 | 0.1533 | | -- 2.49.1