From f9633fa9b94c633677863bfd0dc183b8717cfd77 Mon Sep 17 00:00:00 2001 From: Byron Hsu Date: Sun, 10 Nov 2024 21:57:32 -0800 Subject: [PATCH] [rust] cache-aware DP - approx tree (#1934) --- .../multi_turn_chat/long_prompt_multi_turn.py | 103 ++ rust/Cargo.lock | 1021 ++++++++++++++--- rust/Cargo.toml | 3 +- rust/py_src/dp_demo.py | 156 +++ rust/py_src/main.py | 12 + rust/readme.md | 22 + rust/sglang | 1 + rust/src/lib.rs | 65 +- rust/src/main.rs | 72 +- rust/src/router.rs | 158 ++- rust/src/server.rs | 15 +- rust/src/tree.rs | 17 +- rust/tests/test_tree.rs | 4 +- 13 files changed, 1472 insertions(+), 177 deletions(-) create mode 100644 benchmark/multi_turn_chat/long_prompt_multi_turn.py create mode 100644 rust/py_src/dp_demo.py create mode 100644 rust/py_src/main.py create mode 160000 rust/sglang diff --git a/benchmark/multi_turn_chat/long_prompt_multi_turn.py b/benchmark/multi_turn_chat/long_prompt_multi_turn.py new file mode 100644 index 000000000..c6fa67438 --- /dev/null +++ b/benchmark/multi_turn_chat/long_prompt_multi_turn.py @@ -0,0 +1,103 @@ +import itertools +import json +import random +import string +import threading +import time +from argparse import ArgumentParser + +import sglang as sgl +from sglang.srt.hf_transformers_utils import get_tokenize +from sglang.test.test_utils import ( + add_common_sglang_args_and_parse, + select_sglang_backend, +) +from sglang.utils import dump_state_text + +random.seed(42) + + +def gen_prompt(tokenizer, token_num): + all_available_tokens = list(tokenizer.get_vocab().values()) + selected_tokens = random.choices(all_available_tokens, k=token_num) + ret = tokenizer.decode(selected_tokens) + return ret + + +def gen_arguments(args, tokenizer): + multi_qas = [ + {"system_prompt": gen_prompt(tokenizer, args.system_prompt_len), "qas": []} + for _ in range(args.num_qa) + ] + for i in range(args.num_qa): + qas = multi_qas[i]["qas"] + for j in range(args.turns): + qas.append( + { + "prompt": gen_prompt(tokenizer, args.len_q), + "new_tokens": args.len_a, + } + ) + return multi_qas + + +@sgl.function +def multi_turns(s, system_prompt, qas): + s += system_prompt + + for qa in qas: + s += qa["prompt"] + s += sgl.gen(max_tokens=qa["new_tokens"], ignore_eos=True) + + +def main(args): + tokenizer = get_tokenizer(args.tokenizer, trust_remote_code=args.trust_remote_code) + + multi_qas = gen_arguments(args, tokenizer) + + backend = select_sglang_backend(args) + + tic = time.time() + states = multi_turns.run_batch( + multi_qas, + temperature=0, + backend=backend, + num_threads=args.parallel, + progress_bar=True, + ) + latency = time.time() - tic + + print(f"Latency: {latency:.3f}") + + dump_state_text(f"tmp_output_{args.backend}.txt", states) + + with open(args.result_file, "a") as fout: + value = { + "task": "multi_turn_system_prompt_chat", + "backend": args.backend, + "num_gpus": 1, + "latency": round(latency, 3), + "num_requests": args.num_qa, + "num_turns": args.turns, + "other": { + "parallel": args.parallel, + }, + } + fout.write(json.dumps(value) + "\n") + + +if __name__ == "__main__": + parser = ArgumentParser() + parser.add_argument("--turns", type=int, default=8) + parser.add_argument("--num-qa", type=int, default=128) + parser.add_argument("--system-prompt-len", type=int, default=2048) + parser.add_argument("--len-q", type=int, default=32) + parser.add_argument("--len-a", type=int, default=128) + parser.add_argument( + "--tokenizer", type=str, default="meta-llama/Meta-Llama-3-8B-Instruct" + ) + parser.add_argument("--trust-remote-code", action="store_true") + args = add_common_sglang_args_and_parse(parser) + + print(args) + main(args) diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 44a5a198b..a7f0dce33 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -8,7 +8,7 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f7b0a21988c1bf877cf4759ef5ddaac04c1c9fe808c9142ecb78ba97d97a28a" dependencies = [ - "bitflags", + "bitflags 2.6.0", "bytes", "futures-core", "futures-sink", @@ -30,8 +30,8 @@ dependencies = [ "actix-service", "actix-utils", "ahash", - "base64", - "bitflags", + "base64 0.22.1", + "bitflags 2.6.0", "brotli", "bytes", "bytestring", @@ -43,7 +43,7 @@ dependencies = [ "http 0.2.12", "httparse", "httpdate", - "itoa 1.0.11", + "itoa", "language-tags", "local-channel", "mime", @@ -156,7 +156,7 @@ dependencies = [ "futures-core", "futures-util", "impl-more", - "itoa 1.0.11", + "itoa", "language-tags", "log", "mime", @@ -239,9 +239,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.15" +version = "0.6.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526" +checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" dependencies = [ "anstyle", "anstyle-parse", @@ -254,36 +254,36 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.8" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" [[package]] name = "anstyle-parse" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb" +checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.1.1" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a" +checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] name = "anstyle-wincon" -version = "3.0.4" +version = "3.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8" +checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125" dependencies = [ "anstyle", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -310,15 +310,27 @@ dependencies = [ "miniz_oxide", "object", "rustc-demangle", - "windows-targets", + "windows-targets 0.52.6", ] +[[package]] +name = "base64" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" + [[package]] name = "base64" version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + [[package]] name = "bitflags" version = "2.6.0" @@ -384,9 +396,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.1.31" +version = "1.1.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2e7962b54006dcfcc61cb72735f4d89bb97061dd6a7ed882ec6b8ee53714c6f" +checksum = "baee610e9452a8f6f0a1b6194ec09ff9e2d85dea54432acdae41aa0761c95d70" dependencies = [ "jobserver", "libc", @@ -441,9 +453,22 @@ checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" [[package]] name = "colorchoice" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" +checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" + +[[package]] +name = "console" +version = "0.15.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb" +dependencies = [ + "encode_unicode", + "lazy_static", + "libc", + "unicode-width", + "windows-sys 0.52.0", +] [[package]] name = "convert_case" @@ -496,6 +521,31 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" + [[package]] name = "crypto-common" version = "0.1.6" @@ -506,6 +556,41 @@ dependencies = [ "typenum", ] +[[package]] +name = "darling" +version = "0.20.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95133861a8032aaea082871032f5815eb9e98cef03fa916ab4500513994df9e5" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.20.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" +dependencies = [ + "darling_core", + "quote", + "syn", +] + [[package]] name = "deranged" version = "0.3.11" @@ -515,6 +600,37 @@ dependencies = [ "powerfmt", ] +[[package]] +name = "derive_builder" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" +dependencies = [ + "derive_builder_macro", +] + +[[package]] +name = "derive_builder_core" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "derive_builder_macro" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" +dependencies = [ + "derive_builder_core", + "syn", +] + [[package]] name = "derive_more" version = "0.99.18" @@ -539,16 +655,54 @@ dependencies = [ ] [[package]] -name = "dtoa" -version = "0.4.8" +name = "dirs" +version = "5.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56899898ce76aaf4a0f24d914c97ea6ed976d42fec6ad33fcbb0a1103e07b2b0" +checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" +dependencies = [ + "dirs-sys", +] + +[[package]] +name = "dirs-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" +dependencies = [ + "libc", + "option-ext", + "redox_users", + "windows-sys 0.48.0", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "either" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" + +[[package]] +name = "encode_unicode" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" [[package]] name = "encoding_rs" -version = "0.8.34" +version = "0.8.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59" +checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" dependencies = [ "cfg-if", ] @@ -569,6 +723,15 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "esaxx-rs" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6" +dependencies = [ + "cc", +] + [[package]] name = "fastrand" version = "2.1.1" @@ -743,9 +906,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.15.0" +version = "0.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb" +checksum = "3a9bfc1af68b1726ea47d3d5109de126281def866b33970e10fbab11b5dafab3" [[package]] name = "heck" @@ -759,6 +922,23 @@ version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" +[[package]] +name = "hf-hub" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b780635574b3d92f036890d8373433d6f9fc7abb320ee42a5c25897fc8ed732" +dependencies = [ + "dirs", + "indicatif", + "log", + "native-tls", + "rand", + "serde", + "serde_json", + "thiserror", + "ureq", +] + [[package]] name = "http" version = "0.2.12" @@ -767,7 +947,7 @@ checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" dependencies = [ "bytes", "fnv", - "itoa 1.0.11", + "itoa", ] [[package]] @@ -778,7 +958,7 @@ checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" dependencies = [ "bytes", "fnv", - "itoa 1.0.11", + "itoa", ] [[package]] @@ -829,7 +1009,7 @@ dependencies = [ "http 1.1.0", "http-body", "httparse", - "itoa 1.0.11", + "itoa", "pin-project-lite", "smallvec", "tokio", @@ -871,9 +1051,9 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.9" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41296eb09f183ac68eec06e03cdbea2e759633d4067b2f6552fc2e009bcad08b" +checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4" dependencies = [ "bytes", "futures-channel", @@ -889,13 +1069,148 @@ dependencies = [ ] [[package]] -name = "idna" -version = "0.5.0" +name = "icu_collections" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" dependencies = [ - "unicode-bidi", - "unicode-normalization", + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + +[[package]] +name = "idna" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" +dependencies = [ + "icu_normalizer", + "icu_properties", ] [[package]] @@ -914,12 +1229,34 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "indicatif" +version = "0.17.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "763a5a8f45087d6bcea4222e7b72c291a054edf80e4ef6efd2a4979878c7bea3" +dependencies = [ + "console", + "instant", + "number_prefix", + "portable-atomic", + "unicode-width", +] + [[package]] name = "indoc" version = "2.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" +[[package]] +name = "instant" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" +dependencies = [ + "cfg-if", +] + [[package]] name = "ipnet" version = "2.10.1" @@ -933,10 +1270,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" [[package]] -name = "itoa" -version = "0.3.4" +name = "itertools" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8324a32baf01e2ae060e9de58ed0bc2320c9a2833491ee36cd3b4c414de4db8c" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] [[package]] name = "itoa" @@ -968,18 +1317,40 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4345964bb142484797b161f473a503a434de77149dd8c7427788c6e13379388" +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + [[package]] name = "libc" version = "0.2.161" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1" +[[package]] +name = "libredox" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" +dependencies = [ + "bitflags 2.6.0", + "libc", +] + [[package]] name = "linux-raw-sys" version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +[[package]] +name = "litemap" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" + [[package]] name = "local-channel" version = "0.1.5" @@ -1013,6 +1384,22 @@ version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" +[[package]] +name = "macro_rules_attribute" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a82271f7bc033d84bbca59a3ce3e4159938cb08a9c3aebbe54d215131518a13" +dependencies = [ + "macro_rules_attribute-proc_macro", + "paste", +] + +[[package]] +name = "macro_rules_attribute-proc_macro" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8dd856d451cc0da70e2ef2ce95a18e39a93b7558bedf10201ad28503f918568" + [[package]] name = "memchr" version = "2.7.4" @@ -1034,6 +1421,12 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "miniz_oxide" version = "0.8.0" @@ -1056,6 +1449,27 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "monostate" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d208407d7552cd041d8cdb69a1bc3303e029c598738177a3d87082004dc0e1e" +dependencies = [ + "monostate-impl", + "serde", +] + +[[package]] +name = "monostate-impl" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7ce64b975ed4f123575d11afd9491f2e37bbd5813fbfbc0f09ae1fbddea74e0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "native-tls" version = "0.2.12" @@ -1073,6 +1487,16 @@ dependencies = [ "tempfile", ] +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "num-conv" version = "0.1.0" @@ -1080,22 +1504,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" [[package]] -name = "num-traits" -version = "0.1.43" +name = "number_prefix" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92e5113e9fd4cc14ded8e499429f396a20f98c772a47cc8622a736e1ec843c31" -dependencies = [ - "num-traits 0.2.19", -] - -[[package]] -name = "num-traits" -version = "0.2.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" -dependencies = [ - "autocfg", -] +checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" [[package]] name = "object" @@ -1112,13 +1524,35 @@ version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" +[[package]] +name = "onig" +version = "6.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c4b31c8722ad9171c6d77d3557db078cab2bd50afcc9d09c8b315c59df8ca4f" +dependencies = [ + "bitflags 1.3.2", + "libc", + "once_cell", + "onig_sys", +] + +[[package]] +name = "onig_sys" +version = "69.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b829e3d7e9cc74c7e315ee8edb185bf4190da5acde74afd7fc59c35b1f086e7" +dependencies = [ + "cc", + "pkg-config", +] + [[package]] name = "openssl" version = "0.10.68" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6174bc48f102d208783c2c84bf931bb75927a617866870de8a4ea85597f871f5" dependencies = [ - "bitflags", + "bitflags 2.6.0", "cfg-if", "foreign-types", "libc", @@ -1156,6 +1590,12 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + [[package]] name = "parking_lot" version = "0.12.3" @@ -1176,7 +1616,7 @@ dependencies = [ "libc", "redox_syscall", "smallvec", - "windows-targets", + "windows-targets 0.52.6", ] [[package]] @@ -1193,9 +1633,9 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "pin-project-lite" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" +checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff" [[package]] name = "pin-utils" @@ -1241,9 +1681,9 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.22.5" +version = "0.22.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d922163ba1f79c04bc49073ba7b32fd5a8d3b76a87c955921234b8e77333c51" +checksum = "f402062616ab18202ae8319da13fa4279883a2b8a9d9f83f20dbade813ce1884" dependencies = [ "cfg-if", "indoc", @@ -1259,9 +1699,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.22.5" +version = "0.22.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc38c5feeb496c8321091edf3d63e9a6829eab4b863b4a6a65f26f3e9cc6b179" +checksum = "b14b5775b5ff446dd1056212d778012cbe8a0fbffd368029fd9e25b514479c38" dependencies = [ "once_cell", "target-lexicon", @@ -1269,9 +1709,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.22.5" +version = "0.22.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94845622d88ae274d2729fcefc850e63d7a3ddff5e3ce11bd88486db9f1d357d" +checksum = "9ab5bcf04a2cdcbb50c7d6105de943f543f9ed92af55818fd17b660390fc8636" dependencies = [ "libc", "pyo3-build-config", @@ -1279,9 +1719,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.22.5" +version = "0.22.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e655aad15e09b94ffdb3ce3d217acf652e26bbc37697ef012f5e5e348c716e5e" +checksum = "0fd24d897903a9e6d80b968368a34e1525aeb719d568dba8b3d4bfa5dc67d453" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -1291,9 +1731,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.22.5" +version = "0.22.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae1e3f09eecd94618f60a455a23def79f79eba4dc561a97324bf9ac8c6df30ce" +checksum = "36c011a03ba1e50152b4b394b479826cad97e7a21eb52df179cd91ac411cbfbe" dependencies = [ "heck", "proc-macro2", @@ -1341,20 +1781,62 @@ dependencies = [ "getrandom", ] +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-cond" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "059f538b55efd2309c9794130bc149c6a553db90e9d99c2030785c82f0bd7df9" +dependencies = [ + "either", + "itertools 0.11.0", + "rayon", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "redox_syscall" version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" dependencies = [ - "bitflags", + "bitflags 2.6.0", +] + +[[package]] +name = "redox_users" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" +dependencies = [ + "getrandom", + "libredox", + "thiserror", ] [[package]] name = "regex" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", @@ -1387,11 +1869,11 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "reqwest" -version = "0.12.8" +version = "0.12.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f713147fbe92361e52392c73b8c9e48c04c6625bce969ef54dc901e58e042a7b" +checksum = "a77c62af46e79de0a562e1a9849205ffcb7fc1238876e9bd743357570e04046f" dependencies = [ - "base64", + "base64 0.22.1", "bytes", "encoding_rs", "futures-core", @@ -1462,11 +1944,11 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.37" +version = "0.38.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811" +checksum = "375116bee2be9ed569afe2154ea6a99dfdffd257f533f187498c2a8f5feaf4ee" dependencies = [ - "bitflags", + "bitflags 2.6.0", "errno", "libc", "linux-raw-sys", @@ -1475,11 +1957,13 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.15" +version = "0.23.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fbb44d7acc4e873d613422379f69f237a1b141928c02f6bc6ccfddddc2d7993" +checksum = "eee87ff5d9b36712a58574e12e9f0ea80f915a5b0ac518d322b24a465617925e" dependencies = [ + "log", "once_cell", + "ring", "rustls-pki-types", "rustls-webpki", "subtle", @@ -1539,7 +2023,7 @@ version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ - "bitflags", + "bitflags 2.6.0", "core-foundation", "core-foundation-sys", "libc", @@ -1564,18 +2048,18 @@ checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" [[package]] name = "serde" -version = "1.0.213" +version = "1.0.214" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ea7893ff5e2466df8d720bb615088341b295f849602c6956047f8f80f0e9bc1" +checksum = "f55c3193aca71c12ad7890f1785d2b73e1b9f63a0bbc353c08ef26fe03fc56b5" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.213" +version = "1.0.214" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e85ad2009c50b58e87caa8cd6dac16bdf511bbfb7af6c33df902396aa480fa5" +checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766" dependencies = [ "proc-macro2", "quote", @@ -1584,13 +2068,13 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.1" +version = "1.0.132" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c62115693d0a9ed8c32d1c760f0fdbe7d4b05cb13c135b9b54137ac0d59fccb" +checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" dependencies = [ - "dtoa", - "itoa 0.3.4", - "num-traits 0.1.43", + "itoa", + "memchr", + "ryu", "serde", ] @@ -1601,7 +2085,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" dependencies = [ "form_urlencoded", - "itoa 1.0.11", + "itoa", "ryu", "serde", ] @@ -1619,6 +2103,7 @@ dependencies = [ "reqwest", "serde", "serde_json", + "tokenizers", ] [[package]] @@ -1678,6 +2163,24 @@ version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +[[package]] +name = "spm_precompiled" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5851699c4033c63636f7ea4cf7b7c1f1bf06d0cc03cfb42e711de5a5c46cf326" +dependencies = [ + "base64 0.13.1", + "nom", + "serde", + "unicode-segmentation", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "strsim" version = "0.11.1" @@ -1692,9 +2195,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "2.0.85" +version = "2.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5023162dfcd14ef8f32034d8bcd4cc5ddc61ef7a247c024a33e24e1f24d21b56" +checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" dependencies = [ "proc-macro2", "quote", @@ -1710,13 +2213,24 @@ dependencies = [ "futures-core", ] +[[package]] +name = "synstructure" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "system-configuration" version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ - "bitflags", + "bitflags 2.6.0", "core-foundation", "system-configuration-sys", ] @@ -1750,6 +2264,26 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "thiserror" +version = "1.0.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02dd99dc800bbb97186339685293e1cc5d9df1f8fae2d0aecd9ff1c77efea892" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7c61ec9a6f64d2793d8a45faba21efbe3ced62a886d44c36a009b2b519b4c7e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "time" version = "0.3.36" @@ -1757,7 +2291,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" dependencies = [ "deranged", - "itoa 1.0.11", + "itoa", "num-conv", "powerfmt", "serde", @@ -1782,19 +2316,47 @@ dependencies = [ ] [[package]] -name = "tinyvec" -version = "1.8.0" +name = "tinystr" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" dependencies = [ - "tinyvec_macros", + "displaydoc", + "zerovec", ] [[package]] -name = "tinyvec_macros" -version = "0.1.1" +name = "tokenizers" +version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" +checksum = "67b67c92f6d705e2a1d106fb0b28c696f9074901a9c656ee5d9f5de204c39bf7" +dependencies = [ + "aho-corasick", + "derive_builder", + "esaxx-rs", + "getrandom", + "hf-hub", + "indicatif", + "itertools 0.12.1", + "lazy_static", + "log", + "macro_rules_attribute", + "monostate", + "onig", + "paste", + "rand", + "rayon", + "rayon-cond", + "regex", + "regex-syntax", + "serde", + "serde_json", + "spm_precompiled", + "thiserror", + "unicode-normalization-alignments", + "unicode-segmentation", + "unicode_categories", +] [[package]] name = "tokio" @@ -1885,12 +2447,6 @@ version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" -[[package]] -name = "unicode-bidi" -version = "0.3.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ab17db44d7388991a428b2ee655ce0c212e862eff1768a455c58f9aad6e7893" - [[package]] name = "unicode-ident" version = "1.0.13" @@ -1898,14 +2454,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" [[package]] -name = "unicode-normalization" -version = "0.1.24" +name = "unicode-normalization-alignments" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" +checksum = "43f613e4fa046e69818dd287fdc4bc78175ff20331479dab6e1b0f98d57062de" dependencies = [ - "tinyvec", + "smallvec", ] +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + +[[package]] +name = "unicode_categories" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" + [[package]] name = "unindent" version = "0.2.3" @@ -1919,16 +2493,47 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] -name = "url" -version = "2.5.2" +name = "ureq" +version = "2.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" +checksum = "b74fc6b57825be3373f7054754755f03ac3a8f5d70015ccad699ba2029956f4a" +dependencies = [ + "base64 0.22.1", + "flate2", + "log", + "native-tls", + "once_cell", + "rustls", + "rustls-pki-types", + "serde", + "serde_json", + "url", + "webpki-roots", +] + +[[package]] +name = "url" +version = "2.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d157f1b96d14500ffdc1f10ba712e780825526c03d9a49b4d0324b0d9113ada" dependencies = [ "form_urlencoded", "idna", "percent-encoding", ] +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "utf8parse" version = "0.2.2" @@ -2031,9 +2636,9 @@ checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" [[package]] name = "wasm-streams" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e072d4e72f700fb3443d8fe94a39315df013eef1104903cdb0a2abd322bbecd" +checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" dependencies = [ "futures-util", "js-sys", @@ -2052,6 +2657,15 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "webpki-roots" +version = "0.26.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841c67bff177718f1d4dfefde8d8f0e78f9b6589319ba88312f567fc5841a958" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "windows-registry" version = "0.2.0" @@ -2060,7 +2674,7 @@ checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0" dependencies = [ "windows-result", "windows-strings", - "windows-targets", + "windows-targets 0.52.6", ] [[package]] @@ -2069,7 +2683,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" dependencies = [ - "windows-targets", + "windows-targets 0.52.6", ] [[package]] @@ -2079,7 +2693,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" dependencies = [ "windows-result", - "windows-targets", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", ] [[package]] @@ -2088,7 +2711,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets", + "windows-targets 0.52.6", ] [[package]] @@ -2097,7 +2720,22 @@ version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" dependencies = [ - "windows-targets", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", ] [[package]] @@ -2106,28 +2744,46 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", "windows_i686_gnullvm", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + [[package]] name = "windows_aarch64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + [[package]] name = "windows_i686_gnu" version = "0.52.6" @@ -2140,30 +2796,90 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + [[package]] name = "windows_i686_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + [[package]] name = "windows_x86_64_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + [[package]] name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + +[[package]] +name = "yoke" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + [[package]] name = "zerocopy" version = "0.7.35" @@ -2185,12 +2901,55 @@ dependencies = [ "syn", ] +[[package]] +name = "zerofrom" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + [[package]] name = "zeroize" version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" +[[package]] +name = "zerovec" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "zstd" version = "0.13.2" diff --git a/rust/Cargo.toml b/rust/Cargo.toml index a23ac1e46..d02eba639 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -21,5 +21,6 @@ bytes = "1.8.0" rand = "0.8.5" reqwest = { version = "0.12.8", features = ["stream"] } futures-util = "0.3" -serde_json = "=1.0.1" +serde_json = "1.0" pyo3 = { version = "0.22.5", features = ["extension-module"] } +tokenizers = { version = "0.20.3", features = ["http"] } diff --git a/rust/py_src/dp_demo.py b/rust/py_src/dp_demo.py new file mode 100644 index 000000000..8b601e95a --- /dev/null +++ b/rust/py_src/dp_demo.py @@ -0,0 +1,156 @@ +import argparse +import os +import signal +import subprocess +import sys +import time +from typing import Dict, List + +import requests +from sglang_router import PolicyType, Router + +# Global processes list for cleanup +_processes: List[subprocess.Popen] = [] + + +def cleanup_processes(signum=None, frame=None): + """Cleanup function to kill all worker processes.""" + print("\nCleaning up processes...") + for process in _processes: + try: + # Kill the entire process group + pgid = os.getpgid(process.pid) + os.killpg(pgid, signal.SIGKILL) + process.wait() + except: + pass + sys.exit(1) + + +# Register signal handlers +signal.signal(signal.SIGINT, cleanup_processes) +signal.signal(signal.SIGTERM, cleanup_processes) + + +def parse_args(): + """Parse command line arguments.""" + parser = argparse.ArgumentParser(description="Launch SGLang Router Server") + parser.add_argument( + "--host", type=str, default="localhost", help="Host address to bind the server" + ) + parser.add_argument( + "--port", type=int, default=30000, help="Base port number for workers" + ) + parser.add_argument( + "--dp", + type=int, + default=2, + help="Number of worker processes (degree of parallelism)", + ) + parser.add_argument( + "--model-path", type=str, required=True, help="Path to the model" + ) + parser.add_argument( + "--local-tokenizer-path", + type=str, + required=True, + help="Path to the local tokenizer", + ) + return parser.parse_args() + + +def launch_workers(args) -> tuple[List[subprocess.Popen], List[str]]: + """Launch all worker processes concurrently using subprocess.""" + processes = [] + worker_urls = [] + + # Launch each worker process + for i in range(args.dp): + port = args.port + i + url = f"http://{args.host}:{port}" + worker_urls.append(url) + # TODO: replace this with launch_server, and move this file to sglang/ because it depends on sglang + # We don't + command = f"export CUDA_VISIBLE_DEVICES={i}; python -m sglang.launch_server --model-path {args.model_path} --host {args.host} --port {port}" + print(command) + process = subprocess.Popen(command, shell=True) + processes.append(process) + _processes.append(process) # Add to global list for cleanup + + return processes, worker_urls + + +def wait_for_healthy_workers(worker_urls: List[str], timeout: int = 300) -> bool: + """Block until all workers are healthy or timeout is reached.""" + start_time = time.time() + healthy_workers: Dict[str, bool] = {url: False for url in worker_urls} + + while time.time() - start_time < timeout: + print("checking healthiness...") + all_healthy = True + + for url in worker_urls: + if not healthy_workers[url]: # Only check workers that aren't healthy yet + try: + response = requests.get(f"{url}/health") + if response.status_code == 200: + print(f"Worker at {url} is healthy") + healthy_workers[url] = True + else: + all_healthy = False + except requests.RequestException: + all_healthy = False + + if all_healthy: + print("All workers are healthy!") + return True + + time.sleep(5) + + # If we get here, we've timed out + unhealthy_workers = [url for url, healthy in healthy_workers.items() if not healthy] + print(f"Timeout waiting for workers: {unhealthy_workers}") + return False + + +def main(): + """Main function to launch the router and workers.""" + args = parse_args() + processes = None + + try: + # Launch all workers concurrently + processes, worker_urls = launch_workers(args) + + # Block until all workers are healthy + if not wait_for_healthy_workers(worker_urls): + raise RuntimeError("Failed to start all workers") + + # Initialize and start the router + router = Router( + worker_urls=worker_urls, + policy=PolicyType.ApproxTree, + tokenizer_path=args.local_tokenizer_path, + ) + + print("Starting router...") + router.start() + + # Keep the main process running + try: + while True: + time.sleep(1) + except KeyboardInterrupt: + print("\nShutting down...") + + except Exception as e: + print(f"Error: {e}") + finally: + # Cleanup: Kill all worker processes + if processes: + for process in processes: + process.kill() + + +if __name__ == "__main__": + main() diff --git a/rust/py_src/main.py b/rust/py_src/main.py new file mode 100644 index 000000000..6de3f8c88 --- /dev/null +++ b/rust/py_src/main.py @@ -0,0 +1,12 @@ +from sglang_router import PolicyType, Router + +router = Router( + worker_urls=[ + "http://localhost:30000", + "http://localhost:30001", + ], + policy=PolicyType.ApproxTree, + tokenizer_path="/shared/public/elr-models/meta-llama/Meta-Llama-3.1-8B-Instruct/07eb05b21d191a58c577b4a45982fe0c049d0693/tokenizer.json", +) + +router.start() diff --git a/rust/readme.md b/rust/readme.md index 716e50a97..7ef672e0e 100644 --- a/rust/readme.md +++ b/rust/readme.md @@ -2,6 +2,11 @@ SGLang router is a standalone module implemented in Rust to achieve data parallelism across SGLang instances. +## Architecture + +1. `src/`: rust impl of the router +2. `py_src/`: lightweight python interafce on top of rust python binding. This will be published as `sglang-router` pypi package + ## Installation WIP. Ideally just @@ -83,6 +88,23 @@ $ maturin develop 🛠 Installed sglang_router-0.0.0 ``` +4. Alternatively, if you don't want to create a venv, you can also build the binding as a wheel and install it + +```bash +$ maturin build --interpreter python +... + Compiling pyo3 v0.22.6 + Compiling pyo3-macros v0.22.6 + Compiling sglang_router v0.0.0 (/home/jobuser/sglang/rust) + Finished `dev` profile [unoptimized + debuginfo] target(s) in 9.67s +🖨 Copied external shared libraries to package sglang_router.libs directory: + /usr/lib/libssl.so.1.1.1k + /usr/lib/libcrypto.so.1.1.1k +📦 Built wheel for CPython 3.10 to + +$ pip install +``` + ## Usage 1. Launch worker instances diff --git a/rust/sglang b/rust/sglang new file mode 160000 index 000000000..760552e06 --- /dev/null +++ b/rust/sglang @@ -0,0 +1 @@ +Subproject commit 760552e068edb58d9cd6e68aa1b714c247027d92 diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 26e43bb8a..cc99cb15a 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -1,37 +1,86 @@ +// Python Binding use pyo3::prelude::*; pub mod router; mod server; pub mod tree; -// Python binding +#[pyclass(eq)] +#[derive(Clone, PartialEq)] +pub enum PolicyType { + Random, + RoundRobin, + ApproxTree, +} + #[pyclass] struct Router { host: String, port: u16, worker_urls: Vec, - policy: String, + policy: PolicyType, + tokenizer_path: Option, + cache_threshold: Option, } #[pymethods] impl Router { #[new] - fn new(host: String, port: u16, worker_urls: Vec, policy: String) -> Self { - Router { + #[pyo3(signature = ( + worker_urls, + policy = PolicyType::RoundRobin, + host = String::from("127.0.0.1"), + port = 3001, + tokenizer_path = None, + cache_threshold = Some(0.50) + ))] + fn new( + worker_urls: Vec, + policy: PolicyType, + host: String, + port: u16, + tokenizer_path: Option, + cache_threshold: Option, + ) -> PyResult { + // Validate required parameters for approx_tree policy + if matches!(policy, PolicyType::ApproxTree) { + if tokenizer_path.is_none() { + return Err(PyErr::new::( + "tokenizer_path is required for approx_tree policy", + )); + } + } + + Ok(Router { host, port, worker_urls, policy, - } + tokenizer_path, + cache_threshold, + }) } fn start(&self) -> PyResult<()> { let host = self.host.clone(); let port = self.port; let worker_urls = self.worker_urls.clone(); - let policy = self.policy.clone(); + + let policy_config = match &self.policy { + PolicyType::Random => router::PolicyConfig::RandomConfig, + PolicyType::RoundRobin => router::PolicyConfig::RoundRobinConfig, + PolicyType::ApproxTree => router::PolicyConfig::ApproxTreeConfig { + tokenizer_path: self + .tokenizer_path + .clone() + .expect("tokenizer_path is required for approx_tree policy"), + cache_threshold: self + .cache_threshold + .expect("cache_threshold is required for approx_tree policy"), + }, + }; actix_web::rt::System::new().block_on(async move { - server::startup(host, port, worker_urls, policy) + server::startup(host, port, worker_urls, policy_config) .await .unwrap(); }); @@ -40,9 +89,9 @@ impl Router { } } -// python usage: `from sglang_router import Router` #[pymodule] fn sglang_router(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; m.add_class::()?; Ok(()) } diff --git a/rust/src/main.rs b/rust/src/main.rs index 851a85ae5..f7c8943eb 100644 --- a/rust/src/main.rs +++ b/rust/src/main.rs @@ -1,29 +1,87 @@ // src/main.rs -use clap::builder::PossibleValuesParser; use clap::Parser; +use clap::ValueEnum; // declare child modules mod router; mod server; mod tree; +use crate::router::PolicyConfig; + +#[derive(Debug, Clone, ValueEnum)] +pub enum PolicyType { + Random, + RoundRobin, + ApproxTree, +} + #[derive(Parser, Debug)] #[command(author, version, about, long_about = None)] struct Args { - #[arg(long, default_value = "127.0.0.1")] + #[arg( + long, + default_value = "127.0.0.1", + help = "Host address to bind the server to" + )] host: String, - #[arg(long, default_value_t = 3001)] + #[arg(long, default_value_t = 3001, help = "Port number to listen on")] port: u16, - #[arg(long, value_delimiter = ',')] + #[arg( + long, + value_delimiter = ',', + help = "Comma-separated list of worker URLs to distribute requests to" + )] worker_urls: Vec, - #[arg(long, default_value = "round_robin", value_parser = PossibleValuesParser::new(&["round_robin", "random"]))] - policy: String, + #[arg( + long, + default_value_t = PolicyType::RoundRobin, + value_enum, + help = "Load balancing policy to use: random, round_robin, or approx_tree" + )] + policy: PolicyType, + + #[arg( + long, + requires = "policy", + required_if_eq("policy", "approx_tree"), + help = "Path to the tokenizer file, required when using approx_tree policy" + )] + tokenizer_path: Option, + + #[arg( + long, + default_value = "0.50", + requires = "policy", + required_if_eq("policy", "approx_tree"), + help = "Cache threshold (0.0-1.0) for approx_tree routing. Routes to cached worker if match rate exceeds threshold, otherwise routes to shortest queue worker" + )] + cache_threshold: Option, +} + +impl Args { + fn get_policy_config(&self) -> PolicyConfig { + match self.policy { + PolicyType::Random => PolicyConfig::RandomConfig, + PolicyType::RoundRobin => PolicyConfig::RoundRobinConfig, + PolicyType::ApproxTree => PolicyConfig::ApproxTreeConfig { + tokenizer_path: self + .tokenizer_path + .clone() + .expect("tokenizer_path is required for approx_tree policy"), + cache_threshold: self + .cache_threshold + .expect("cache_threshold is required for approx_tree policy"), + }, + } + } } #[actix_web::main] async fn main() -> std::io::Result<()> { let args = Args::parse(); - server::startup(args.host, args.port, args.worker_urls, args.policy).await + let policy_config = args.get_policy_config(); + server::startup(args.host, args.port, args.worker_urls, policy_config).await } diff --git a/rust/src/router.rs b/rust/src/router.rs index 29db6e37c..65ab8214e 100644 --- a/rust/src/router.rs +++ b/rust/src/router.rs @@ -1,38 +1,90 @@ +use crate::tree::RadixTree; use actix_web::http::header::{HeaderValue, CONTENT_TYPE}; use actix_web::{HttpRequest, HttpResponse}; use bytes::Bytes; use futures_util::TryStreamExt; +use std::collections::HashMap; use std::fmt::Debug; +use std::sync::atomic::AtomicUsize; +use std::sync::{Arc, Mutex}; +use tokenizers::tokenizer::Tokenizer; #[derive(Debug)] pub enum Router { RoundRobin { worker_urls: Vec, - current_index: std::sync::atomic::AtomicUsize, + current_index: AtomicUsize, }, Random { worker_urls: Vec, }, + ApproxTree { + worker_urls: Vec, + // TODO: don't lock the whole tree + url_to_tree: Arc>>, + tokenizer: Tokenizer, + url_to_count: Arc>>, + cache_threshold: f32, + }, +} + +pub enum PolicyConfig { + RandomConfig, + RoundRobinConfig, + ApproxTreeConfig { + tokenizer_path: String, + cache_threshold: f32, + }, +} + +fn get_token_ids_from_request(body: &Bytes, tokenizer: &Tokenizer) -> Vec { + // 1. convert body to json + let json = serde_json::from_slice::(body).unwrap(); + // 2. get the text field + let text = json.get("text").and_then(|t| t.as_str()).unwrap_or(""); + // 3. tokenize the text field + let tokens = tokenizer.encode(text, false).unwrap(); + + tokens.get_ids().to_vec() } impl Router { - pub fn new(worker_urls: Vec, policy: String) -> Self { - match policy.to_lowercase().as_str() { - "random" => Router::Random { worker_urls }, - "round_robin" => Router::RoundRobin { + pub fn new(worker_urls: Vec, policy_config: PolicyConfig) -> Self { + match policy_config { + PolicyConfig::RandomConfig => Router::Random { worker_urls }, + PolicyConfig::RoundRobinConfig => Router::RoundRobin { worker_urls, current_index: std::sync::atomic::AtomicUsize::new(0), }, - _ => panic!( - "Unknown routing policy: {}. The available policies are 'random' and 'round_robin'", - policy - ), + PolicyConfig::ApproxTreeConfig { + tokenizer_path, + cache_threshold, + } => { + let mut url_to_tree = HashMap::new(); + let mut url_to_count = HashMap::new(); + + for url in &worker_urls { + url_to_tree.insert(url.clone(), RadixTree::new()); + url_to_count.insert(url.clone(), 0); + } + + Router::ApproxTree { + worker_urls, + url_to_tree: Arc::new(Mutex::new(url_to_tree)), + // TODO: rust ::from_pretrained cannot load from local file, so use ::from_file to load local file + tokenizer: Tokenizer::from_file(tokenizer_path).unwrap(), + url_to_count: Arc::new(Mutex::new(url_to_count)), + cache_threshold, + } + } } } pub fn get_first(&self) -> Option { match self { - Router::RoundRobin { worker_urls, .. } | Router::Random { worker_urls } => { + Router::RoundRobin { worker_urls, .. } + | Router::Random { worker_urls } + | Router::ApproxTree { worker_urls, .. } => { if worker_urls.is_empty() { None } else { @@ -48,26 +100,96 @@ impl Router { req: HttpRequest, body: Bytes, ) -> HttpResponse { + let mut input_ids: Vec = Vec::new(); + if let Router::ApproxTree { tokenizer, .. } = self { + input_ids = get_token_ids_from_request(&body, tokenizer); + } + let worker_url = match self { Router::RoundRobin { worker_urls, current_index, } => { - current_index + let idx = current_index .fetch_update( std::sync::atomic::Ordering::SeqCst, std::sync::atomic::Ordering::SeqCst, |x| Some((x + 1) % worker_urls.len()), ) - .expect_err("Error updating index in round robin"); + .unwrap(); - &worker_urls[current_index.load(std::sync::atomic::Ordering::SeqCst)] + worker_urls[idx].clone() } + Router::Random { worker_urls } => { - &worker_urls[rand::random::() % worker_urls.len()] + worker_urls[rand::random::() % worker_urls.len()].clone() + } + + Router::ApproxTree { + worker_urls, + url_to_tree, + url_to_count, + cache_threshold, + .. + } => { + // TODO: pipeline the locks. Release one earlier. + + let mut max_matched_rate = 0.0; + let mut max_matched_idx = 0; + + let locked_url_to_tree = url_to_tree.lock().unwrap(); + + // 1. Find the highest matched worker + for (i, url) in worker_urls.iter().enumerate() { + let tree = locked_url_to_tree.get(url).unwrap(); + let matched = tree.prefix_match(&input_ids[..]).len(); + let matched_rate = matched as f32 / input_ids.len() as f32; + + if matched_rate > max_matched_rate { + max_matched_rate = matched_rate; + max_matched_idx = i; + } + } + + // 2. If the rate is higher than the threshold, select the worker. If not, select the worker with the shortest queue + if max_matched_rate > *cache_threshold { + worker_urls[max_matched_idx].clone() + } else { + // pick the shortest queue from url_to_count + let locked_url_to_count = url_to_count.lock().unwrap(); + + let mut min_count = std::usize::MAX; + let mut min_count_id = 0; + + for (i, url) in worker_urls.iter().enumerate() { + let count = locked_url_to_count.get(url).unwrap(); + if *count < min_count { + min_count = *count; + min_count_id = i; + } + } + + worker_urls[min_count_id].clone() + } } }; + if let Router::ApproxTree { + url_to_tree, + url_to_count, + .. + } = self + { + // Insert input_ids to the tree + let mut locked_url_to_tree = url_to_tree.lock().unwrap(); + let selected_tree = locked_url_to_tree.get_mut(&worker_url).unwrap(); + selected_tree.insert(&input_ids[..]); + + let mut locked_url_to_count = url_to_count.lock().unwrap(); + let count = locked_url_to_count.get_mut(&worker_url).unwrap(); + *count += 1; + } + // Check if client requested streaming let is_stream = serde_json::from_slice::(&body) .map(|v| v.get("stream").and_then(|s| s.as_bool()).unwrap_or(false)) @@ -94,11 +216,19 @@ impl Router { .unwrap_or(actix_web::http::StatusCode::INTERNAL_SERVER_ERROR); if !is_stream { + // TODO: do the correction on the tree based on the cached input_ids + if let Router::ApproxTree { url_to_count, .. } = self { + let mut locked_url_to_count = url_to_count.lock().unwrap(); + let count = locked_url_to_count.get_mut(&worker_url).unwrap(); + *count -= 1; + } + match res.bytes().await { Ok(body) => HttpResponse::build(status).body(body.to_vec()), Err(_) => HttpResponse::InternalServerError().finish(), } } else { + // TODO: do the correction on the tree based on the cached input_ids. The streaming might be tricker to handle HttpResponse::build(status) .insert_header((CONTENT_TYPE, HeaderValue::from_static("text/event-stream"))) .streaming(res.bytes_stream().map_err(|_| { diff --git a/rust/src/server.rs b/rust/src/server.rs index fec7fae74..05a2f150c 100644 --- a/rust/src/server.rs +++ b/rust/src/server.rs @@ -1,3 +1,4 @@ +use crate::router::PolicyConfig; use crate::router::Router; use actix_web::{get, post, web, App, HttpRequest, HttpResponse, HttpServer, Responder}; use bytes::Bytes; @@ -9,9 +10,13 @@ pub struct AppState { } impl AppState { - pub fn new(worker_urls: Vec, policy: String, client: reqwest::Client) -> Self { + pub fn new( + worker_urls: Vec, + client: reqwest::Client, + policy_config: PolicyConfig, + ) -> Self { // Create router based on policy - let router = Router::new(worker_urls, policy); + let router = Router::new(worker_urls, policy_config); Self { router, client } } @@ -40,7 +45,6 @@ async fn forward_request( #[get("/v1/models")] async fn v1_model(data: web::Data) -> impl Responder { - // TODO: extract forward_to_route let worker_url = match data.router.get_first() { Some(url) => url, None => return HttpResponse::InternalServerError().finish(), @@ -59,7 +63,6 @@ async fn get_model_info(data: web::Data) -> impl Responder { forward_request(&data.client, worker_url, "/get_model_info".to_string()).await } -// no deser and ser, just forward and return #[post("/generate")] async fn generate(req: HttpRequest, body: Bytes, data: web::Data) -> impl Responder { data.router.dispatch(&data.client, req, body).await @@ -69,7 +72,7 @@ pub async fn startup( host: String, port: u16, worker_urls: Vec, - routing_policy: String, + policy_config: PolicyConfig, ) -> std::io::Result<()> { println!("Starting server on {}:{}", host, port); println!("Worker URLs: {:?}", worker_urls); @@ -80,7 +83,7 @@ pub async fn startup( .expect("Failed to create HTTP client"); // Store both worker_urls and client in AppState - let app_state = web::Data::new(AppState::new(worker_urls, routing_policy, client)); + let app_state = web::Data::new(AppState::new(worker_urls, client, policy_config)); HttpServer::new(move || { App::new() diff --git a/rust/src/tree.rs b/rust/src/tree.rs index 27f1db8c2..2bcb84bef 100644 --- a/rust/src/tree.rs +++ b/rust/src/tree.rs @@ -1,18 +1,19 @@ use std::collections::HashMap; use std::mem; -#[derive(Clone)] +#[derive(Debug)] pub struct Node { - pub children: HashMap, // the key is first id of the child because each child must have unique first id - pub ids: Vec, - pub count: usize, + pub children: HashMap, // the key is first id of the child because each child must have unique first id + pub ids: Vec, + pub count: u32, } +#[derive(Debug)] pub struct RadixTree { pub root: Node, } -fn common_prefix_len(a: &[usize], b: &[usize]) -> usize { +fn common_prefix_len(a: &[u32], b: &[u32]) -> usize { let mut i = 0; while i < a.len() && i < b.len() && a[i] == b[i] { i += 1; @@ -37,7 +38,7 @@ impl RadixTree { } } - pub fn insert(&mut self, input_ids: &[usize]) { + pub fn insert(&mut self, input_ids: &[u32]) { let mut curr = &mut self.root; curr.count += 1; @@ -93,7 +94,7 @@ impl RadixTree { } } - pub fn prefix_match<'a>(&self, input_ids: &'a [usize]) -> &'a [usize] { + pub fn prefix_match<'a>(&self, input_ids: &'a [u32]) -> &'a [u32] { let mut curr = &self.root; let mut curr_idx = 0; @@ -121,7 +122,7 @@ impl RadixTree { &input_ids[..curr_idx] } - pub fn delete(&mut self, input_ids: &[usize]) { + pub fn delete(&mut self, input_ids: &[u32]) { let mut curr = &mut self.root; curr.count -= 1; diff --git a/rust/tests/test_tree.rs b/rust/tests/test_tree.rs index c9e453c10..2fbecbfa7 100644 --- a/rust/tests/test_tree.rs +++ b/rust/tests/test_tree.rs @@ -67,7 +67,7 @@ fn test_prefix_match_partial() { fn test_prefix_match_no_match() { let mut tree = RadixTree::new(); tree.insert(&[1, 2, 3, 4]); - let empty_slices: &[usize] = &[]; + let empty_slices: &[u32] = &[]; assert_eq!(tree.prefix_match(&[5, 6, 7]), empty_slices); } @@ -124,7 +124,7 @@ fn test_delete_nonexistent() { #[test] fn test_empty_input() { let mut tree = RadixTree::new(); - let empty_slice: &[usize] = &[]; + let empty_slice: &[u32] = &[]; tree.insert(empty_slice); assert_eq!(tree.prefix_match(empty_slice), empty_slice); tree.delete(empty_slice); // Should not panic