minor: Add basic editorconfig and pre-commit hooks to enforce style for whitespaces (#1926)

2024-11-06 21:46:04 +08:00
parent 96766101b4
commit a5e0defb5a
77 changed files with 209 additions and 172 deletions
--- a/examples/frontend_language/usage/llava_video/srt_example_llava_v.sh
+++ b/examples/frontend_language/usage/llava_video/srt_example_llava_v.sh
@@ -33,7 +33,7 @@ CUR_NODES_IDX=$2

 VIDEO_DIR=$3

-MODEL_PATH=$4   
+MODEL_PATH=$4

 NUM_FRAMES=$5

@@ -73,16 +73,16 @@ for IDX in $(seq 1 $LOCAL_CHUNKS); do
    (
        START=$(((IDX-1) * GPUS_PER_CHUNK))
        LENGTH=$GPUS_PER_CHUNK # Length for slicing, not the end index
-        
+
        CHUNK_GPUS=(${GPULIST[@]:$START:$LENGTH})
-        
+
        # Convert the chunk GPUs array to a comma-separated string
        CHUNK_GPUS_STR=$(IFS=,; echo "${CHUNK_GPUS[*]}")

        LOCAL_IDX=$((CUR_NODES_IDX * LOCAL_CHUNKS + IDX))

        echo "Chunk $(($LOCAL_IDX - 1)) will run on GPUs $CHUNK_GPUS_STR"
-        
+
        # Calculate the port for this chunk. Ensure it's incremented by 5 for each chunk.
        PORT=$((10000 + RANDOM % 55536))

@@ -92,7 +92,7 @@ for IDX in $(seq 1 $LOCAL_CHUNKS); do

        while [ $RETRY_COUNT -lt $MAX_RETRIES ] && [ $COMMAND_STATUS -ne 0 ]; do
            echo "Running chunk $(($LOCAL_IDX - 1)) on GPUs $CHUNK_GPUS_STR with port $PORT. Attempt $(($RETRY_COUNT + 1))"
-            
+
 #!/bin/bash
            CUDA_VISIBLE_DEVICES=$CHUNK_GPUS_STR python3 srt_example_llava_v.py \
            --port $PORT \
@@ -102,10 +102,10 @@ for IDX in $(seq 1 $LOCAL_CHUNKS); do
            --video-dir $VIDEO_DIR \
            --model-path $MODEL_PATH \
            --num-frames $NUM_FRAMES #&
-            
+
            wait $!  # Wait for the process to finish and capture its exit status
            COMMAND_STATUS=$?
-            
+
            if [ $COMMAND_STATUS -ne 0 ]; then
                echo "Execution failed for chunk $(($LOCAL_IDX - 1)), attempt $(($RETRY_COUNT + 1)). Retrying..."
                RETRY_COUNT=$(($RETRY_COUNT + 1))
@@ -124,8 +124,8 @@ done

 wait

-cat work_dirs/llava_next_video_inference_results/final_results_chunk_*.csv > work_dirs/llava_next_video_inference_results/final_results_node_${CUR_NODES_IDX}.csv   
+cat work_dirs/llava_next_video_inference_results/final_results_chunk_*.csv > work_dirs/llava_next_video_inference_results/final_results_node_${CUR_NODES_IDX}.csv

 END_TIME=$(date +%s)  # Capture end time
 ELAPSED_TIME=$(($END_TIME - $START_TIME))
-echo "Total execution time: $ELAPSED_TIME seconds."
+echo "Total execution time: $ELAPSED_TIME seconds."
--- a/examples/frontend_language/usage/openai_chat_speculative.py
+++ b/examples/frontend_language/usage/openai_chat_speculative.py
@@ -4,8 +4,8 @@ Usage:
 Show in "assistant" the desired answer format. Each "gen" term should have a stop token.
 The stream mode is not supported in speculative execution.

-E.g. 
-correct: 
+E.g.
+correct:
    sgl.assistant("\nName:" + sgl.gen("name", stop="\n") + "\nBirthday:" + sgl.gen("birthday", stop="\n") + "\nJob:" + sgl.gen("job", stop="\n"))
 incorrect:
    s += sgl.assistant("\nName:" + sgl.gen("name", stop="\n"))
--- a/examples/frontend_language/usage/triton/Dockerfile
+++ b/examples/frontend_language/usage/triton/Dockerfile
@@ -7,4 +7,4 @@ RUN git clone https://github.com/sgl-project/sglang.git
 WORKDIR /opt/sglang
 RUN pip install --upgrade pip && \
    pip install -e "python[all]" && \
-    pip install datasets
+    pip install datasets
--- a/examples/frontend_language/usage/triton/README.md
+++ b/examples/frontend_language/usage/triton/README.md
@@ -32,4 +32,4 @@ curl -X POST http://localhost:8000/v2/models/character_generation/generate \
  "INPUT_TEXT": ["harry"]
 }'

-```
+```
--- a/examples/runtime/engine/input_ids.py
+++ b/examples/runtime/engine/input_ids.py
@@ -21,7 +21,7 @@ def main():
    # Tokenize inputs
    tokenizer = get_tokenizer(MODEL_PATH)
    token_ids_list = [tokenizer.encode(prompt) for prompt in prompts]
-    
+
    # Create an LLM.
    # You can also specify `skip_tokenizer_init=True`, but it requires explicit detokenization at the end
    llm = sgl.Engine(model_path=MODEL_PATH)
@@ -36,4 +36,4 @@ def main():
 # The __main__ condition is necessary here because we use "spawn" to create subprocesses
 # Spawn starts a fresh program every time, if there is no __main__, it will run into infinite loop to keep spawning processes from sgl.Engine
 if __name__ == "__main__":
-    main()
+    main()
--- a/examples/runtime/engine/readme.md
+++ b/examples/runtime/engine/readme.md
@@ -37,4 +37,4 @@ curl -X POST http://localhost:8000/generate  -H "Content-Type: application/json"
 curl -X POST http://localhost:8000/generate_stream  -H "Content-Type: application/json"  -d '{"prompt": "The Transformer architecture is..."}' --no-buffer
 ```

-This will send both non-streaming and streaming requests to the server.
+This will send both non-streaming and streaming requests to the server.
--- a/examples/runtime/llava_onevision/http_llama3_llava_test.py
+++ b/examples/runtime/llava_onevision/http_llama3_llava_test.py
@@ -3,7 +3,7 @@ Usage:
 # Installing latest llava-next: pip install git+https://github.com/LLaVA-VL/LLaVA-NeXT.git
 # Installing latest sglang.

-# Endpoint Service CLI: 
+# Endpoint Service CLI:
 python -m sglang.launch_server --model-path lmms-lab/llama3-llava-next-8b --port=30000

 python3 http_llama3_llava_test.py
--- a/examples/runtime/llava_onevision/http_qwen_llava_test.py
+++ b/examples/runtime/llava_onevision/http_qwen_llava_test.py
@@ -3,7 +3,7 @@ Usage:
 # Installing latest llava-next: pip install git+https://github.com/LLaVA-VL/LLaVA-NeXT.git
 # Installing latest sglang.

-# Endpoint Service CLI: 
+# Endpoint Service CLI:
 python -m sglang.launch_server --model-path lmms-lab/llava-next-72b --port=30000 --tp-size=8

 python3 http_qwen_llava_test.py