sglangv0.5.2 & support Qwen3-Next-80B-A3B-Instruct

2025-09-13 17:00:20 +08:00
commit 118f1fc726
2037 changed files with 515371 additions and 0 deletions
--- a/benchmark/multi_document_qa/build_dataset.py
+++ b/benchmark/multi_document_qa/build_dataset.py
@@ -0,0 +1,70 @@
+import json
+
+import transformers
+
+content = "\n".join(
+    open("llama2.txt", "r", encoding="utf-8", errors="ignore").readlines()
+)
+content = content.replace("\n\n", "\n")
+
+# Count token
+name = "meta-llama/Llama-2-7b-chat-hf"
+t = transformers.AutoTokenizer.from_pretrained(name)
+print(f"num tokens: {len(t.encode(content))}")
+
+# Segment
+SEP = "\n\n"
+parts = content.split(SEP)
+print(f"num segments: {len(parts)}")
+
+segment_len = 1100
+
+segments = []
+tmp = []
+tmp_len = 0
+for i in range(len(parts)):
+    tmp.append(parts[i])
+    tmp_len += len(t.encode(parts[i]))
+
+    if tmp_len > segment_len:
+        segments.append(SEP.join(tmp))
+        tmp = []
+        tmp_len = 0
+
+for i, s in enumerate(segments):
+    print(i, len(t.encode(segments[i])))
+
+# Dump
+with open("questions.jsonl", "w") as fout:
+    fout.write(
+        json.dumps(
+            {
+                "documents": segments[:30],
+                "questions": [
+                    "What is the name of the fine-tuned LLMs?",
+                    "Which figure shows the helpfulness human evaluation results for Llama 2-Chat?",
+                    "What is the number of parameters in the largest Llama 2 model?",
+                    "What is the batch size of fine-tuning?",
+                    "Where can we find the details of potential data contamination?",
+                    "What is the full name of MPT?",
+                    "What is the power consumption of RSC in Watt?",
+                    "How many tokens of data do they train on?",
+                    "Which model's release is delayed due to a lack of time to sufficiently red team?",
+                    "Which activation function is used in Llama?",
+                ],
+                "answers": [
+                    "Llama 2 Chat",
+                    "1",
+                    "70 B",
+                    "64",
+                    "A 6",
+                    "MosaicML",
+                    "400",
+                    "2 trillion",
+                    "34 B",
+                    "SwiGLU",
+                ],
+            }
+        )
+        + "\n"
+    )