minor: Add basic editorconfig and pre-commit hooks to enforce style for whitespaces (#1926)
This commit is contained in:
18
examples/frontend_language/usage/llava_video/srt_example_llava_v.sh
Normal file → Executable file
18
examples/frontend_language/usage/llava_video/srt_example_llava_v.sh
Normal file → Executable file
@@ -33,7 +33,7 @@ CUR_NODES_IDX=$2
|
||||
|
||||
VIDEO_DIR=$3
|
||||
|
||||
MODEL_PATH=$4
|
||||
MODEL_PATH=$4
|
||||
|
||||
NUM_FRAMES=$5
|
||||
|
||||
@@ -73,16 +73,16 @@ for IDX in $(seq 1 $LOCAL_CHUNKS); do
|
||||
(
|
||||
START=$(((IDX-1) * GPUS_PER_CHUNK))
|
||||
LENGTH=$GPUS_PER_CHUNK # Length for slicing, not the end index
|
||||
|
||||
|
||||
CHUNK_GPUS=(${GPULIST[@]:$START:$LENGTH})
|
||||
|
||||
|
||||
# Convert the chunk GPUs array to a comma-separated string
|
||||
CHUNK_GPUS_STR=$(IFS=,; echo "${CHUNK_GPUS[*]}")
|
||||
|
||||
LOCAL_IDX=$((CUR_NODES_IDX * LOCAL_CHUNKS + IDX))
|
||||
|
||||
echo "Chunk $(($LOCAL_IDX - 1)) will run on GPUs $CHUNK_GPUS_STR"
|
||||
|
||||
|
||||
# Calculate the port for this chunk. Ensure it's incremented by 5 for each chunk.
|
||||
PORT=$((10000 + RANDOM % 55536))
|
||||
|
||||
@@ -92,7 +92,7 @@ for IDX in $(seq 1 $LOCAL_CHUNKS); do
|
||||
|
||||
while [ $RETRY_COUNT -lt $MAX_RETRIES ] && [ $COMMAND_STATUS -ne 0 ]; do
|
||||
echo "Running chunk $(($LOCAL_IDX - 1)) on GPUs $CHUNK_GPUS_STR with port $PORT. Attempt $(($RETRY_COUNT + 1))"
|
||||
|
||||
|
||||
#!/bin/bash
|
||||
CUDA_VISIBLE_DEVICES=$CHUNK_GPUS_STR python3 srt_example_llava_v.py \
|
||||
--port $PORT \
|
||||
@@ -102,10 +102,10 @@ for IDX in $(seq 1 $LOCAL_CHUNKS); do
|
||||
--video-dir $VIDEO_DIR \
|
||||
--model-path $MODEL_PATH \
|
||||
--num-frames $NUM_FRAMES #&
|
||||
|
||||
|
||||
wait $! # Wait for the process to finish and capture its exit status
|
||||
COMMAND_STATUS=$?
|
||||
|
||||
|
||||
if [ $COMMAND_STATUS -ne 0 ]; then
|
||||
echo "Execution failed for chunk $(($LOCAL_IDX - 1)), attempt $(($RETRY_COUNT + 1)). Retrying..."
|
||||
RETRY_COUNT=$(($RETRY_COUNT + 1))
|
||||
@@ -124,8 +124,8 @@ done
|
||||
|
||||
wait
|
||||
|
||||
cat work_dirs/llava_next_video_inference_results/final_results_chunk_*.csv > work_dirs/llava_next_video_inference_results/final_results_node_${CUR_NODES_IDX}.csv
|
||||
cat work_dirs/llava_next_video_inference_results/final_results_chunk_*.csv > work_dirs/llava_next_video_inference_results/final_results_node_${CUR_NODES_IDX}.csv
|
||||
|
||||
END_TIME=$(date +%s) # Capture end time
|
||||
ELAPSED_TIME=$(($END_TIME - $START_TIME))
|
||||
echo "Total execution time: $ELAPSED_TIME seconds."
|
||||
echo "Total execution time: $ELAPSED_TIME seconds."
|
||||
|
||||
@@ -4,8 +4,8 @@ Usage:
|
||||
Show in "assistant" the desired answer format. Each "gen" term should have a stop token.
|
||||
The stream mode is not supported in speculative execution.
|
||||
|
||||
E.g.
|
||||
correct:
|
||||
E.g.
|
||||
correct:
|
||||
sgl.assistant("\nName:" + sgl.gen("name", stop="\n") + "\nBirthday:" + sgl.gen("birthday", stop="\n") + "\nJob:" + sgl.gen("job", stop="\n"))
|
||||
incorrect:
|
||||
s += sgl.assistant("\nName:" + sgl.gen("name", stop="\n"))
|
||||
|
||||
@@ -7,4 +7,4 @@ RUN git clone https://github.com/sgl-project/sglang.git
|
||||
WORKDIR /opt/sglang
|
||||
RUN pip install --upgrade pip && \
|
||||
pip install -e "python[all]" && \
|
||||
pip install datasets
|
||||
pip install datasets
|
||||
|
||||
@@ -32,4 +32,4 @@ curl -X POST http://localhost:8000/v2/models/character_generation/generate \
|
||||
"INPUT_TEXT": ["harry"]
|
||||
}'
|
||||
|
||||
```
|
||||
```
|
||||
|
||||
@@ -21,7 +21,7 @@ def main():
|
||||
# Tokenize inputs
|
||||
tokenizer = get_tokenizer(MODEL_PATH)
|
||||
token_ids_list = [tokenizer.encode(prompt) for prompt in prompts]
|
||||
|
||||
|
||||
# Create an LLM.
|
||||
# You can also specify `skip_tokenizer_init=True`, but it requires explicit detokenization at the end
|
||||
llm = sgl.Engine(model_path=MODEL_PATH)
|
||||
@@ -36,4 +36,4 @@ def main():
|
||||
# The __main__ condition is necessary here because we use "spawn" to create subprocesses
|
||||
# Spawn starts a fresh program every time, if there is no __main__, it will run into infinite loop to keep spawning processes from sgl.Engine
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
main()
|
||||
|
||||
@@ -37,4 +37,4 @@ curl -X POST http://localhost:8000/generate -H "Content-Type: application/json"
|
||||
curl -X POST http://localhost:8000/generate_stream -H "Content-Type: application/json" -d '{"prompt": "The Transformer architecture is..."}' --no-buffer
|
||||
```
|
||||
|
||||
This will send both non-streaming and streaming requests to the server.
|
||||
This will send both non-streaming and streaming requests to the server.
|
||||
|
||||
@@ -3,7 +3,7 @@ Usage:
|
||||
# Installing latest llava-next: pip install git+https://github.com/LLaVA-VL/LLaVA-NeXT.git
|
||||
# Installing latest sglang.
|
||||
|
||||
# Endpoint Service CLI:
|
||||
# Endpoint Service CLI:
|
||||
python -m sglang.launch_server --model-path lmms-lab/llama3-llava-next-8b --port=30000
|
||||
|
||||
python3 http_llama3_llava_test.py
|
||||
|
||||
@@ -3,7 +3,7 @@ Usage:
|
||||
# Installing latest llava-next: pip install git+https://github.com/LLaVA-VL/LLaVA-NeXT.git
|
||||
# Installing latest sglang.
|
||||
|
||||
# Endpoint Service CLI:
|
||||
# Endpoint Service CLI:
|
||||
python -m sglang.launch_server --model-path lmms-lab/llava-next-72b --port=30000 --tp-size=8
|
||||
|
||||
python3 http_qwen_llava_test.py
|
||||
|
||||
Reference in New Issue
Block a user