[Feat/WIP] add llava-onevision, with support for (1) siglip encoder, (2) qwen2 decoder (3) openai api compatible server. (#1123)

Co-authored-by: Bo Li <drluodian@gmail.com>
This commit is contained in:
Kaichen Zhang - NTU
2024-08-24 05:11:16 +08:00
committed by GitHub
parent 5fafcac008
commit a5b14ad043
13 changed files with 703 additions and 95 deletions

View File

@@ -121,6 +121,20 @@ def batch(video_dir, save_dir, cur_chunk, num_chunks, num_frames=16, batch_size=
if __name__ == "__main__":
url = "https://raw.githubusercontent.com/EvolvingLMMs-Lab/sglang/dev/onevision_local/assets/jobs.mp4"
cache_dir = os.path.expanduser("~/.cache")
file_path = os.path.join(cache_dir, "jobs.mp4")
os.makedirs(cache_dir, exist_ok=True)
response = requests.get(url)
response.raise_for_status() # Raise an exception for bad responses
with open(file_path, "wb") as f:
f.write(response.content)
print(f"File downloaded and saved to: {file_path}")
# Create the parser
parser = argparse.ArgumentParser(
description="Run video processing with specified port."
@@ -148,7 +162,7 @@ if __name__ == "__main__":
parser.add_argument(
"--video-dir",
type=str,
default="./videos/Q98Z4OTh8RwmDonc.mp4",
default=os.path.expanduser("~/.cache/jobs.mp4"),
help="The directory or path for the processed video files.",
)
parser.add_argument(