Improve linear.py to load sharded weights & remove the dependency of Parameters from vllm (#2784)

Co-authored-by: SangBin Cho rkooo567@gmail.com
2025-01-07 23:29:10 -08:00
parent 694e41925e
commit 8a6906127a
15 changed files with 655 additions and 88 deletions
--- a/scripts/killall_sglang.sh
+++ b/scripts/killall_sglang.sh
@@ -7,6 +7,7 @@ nvidia-smi
 kill -9 $(ps aux | grep 'sglang::' | grep -v 'grep' | awk '{print $2}') 2>/dev/null
 kill -9 $(ps aux | grep 'sglang.launch_server' | grep -v 'grep' | awk '{print $2}') 2>/dev/null
 kill -9 $(ps aux | grep 'sglang.bench' | grep -v 'grep' | awk '{print $2}') 2>/dev/null
+kill -9 $(ps aux | grep 'sglang.data_parallel' | grep -v 'grep' | awk '{print $2}') 2>/dev/null

 # Clean all GPU processes if any argument is provided
 if [ $# -gt 0 ]; then