Higher priority for user input of max_prefill_tokens & format (#540)
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
"""Launch the inference server for Llava-video model."""
|
||||
|
||||
import argparse
|
||||
import multiprocessing as mp
|
||||
|
||||
|
||||
Reference in New Issue
Block a user