Files
sglang/docs/advanced_features/vlm_query.ipynb

543 lines
994 KiB
Plaintext
Raw Normal View History

{
"cells": [
{
"cell_type": "markdown",
"id": "0",
"metadata": {},
"source": [
2025-08-10 19:49:45 -07:00
"# Query Vision Language Model"
]
},
{
"cell_type": "markdown",
"id": "1",
"metadata": {},
"source": [
"## Querying Qwen-VL"
]
},
{
"cell_type": "code",
"execution_count": null,
2025-08-10 19:49:45 -07:00
"id": "2",
"metadata": {},
"outputs": [],
"source": [
"import nest_asyncio\n",
"\n",
"nest_asyncio.apply() # Run this first.\n",
"\n",
"model_path = \"Qwen/Qwen2.5-VL-3B-Instruct\"\n",
"chat_template = \"qwen2-vl\""
]
},
{
"cell_type": "code",
"execution_count": null,
2025-08-10 19:49:45 -07:00
"id": "3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<|im_start|>system\n",
"You are a helpful assistant.<|im_end|>\n",
"<|im_start|>user\n",
"What's shown here: <|vision_start|><|image_pad|><|vision_end|>?<|im_end|>\n",
"<|im_start|>assistant\n",
"\n"
]
},
{
"data": {
"image/jpeg": "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAF8AjoDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwDyDRuNQLHnCmur4POccdMVymijN8/H8NdUM7c9+lSNDkwpAHUU7Py4xk5poOeaeAOooGchrCs2qTDPAx/KqHlNj/GtnUULalMcZ5FReQOoHFYTnZm8Kd1cyxGynnj8KcIcirssOGzihEPpxilzh7LUqrD1AFO8sjg8VbRDycHikeMZzS5xuFkZE6gynPpQsSuRlsVJd/LORx0FRpksBW6bsczVmWLWDDO3opxW5oq7bJzz98/yFZkK7YXI/umtbRxnS29fNP8AIVSEbGn6ounTRTHnaM1l3Wo3WuX8zeaY7fPIJ61R1FijKDwp4yelTaSvlpjgjrmlbW4/UqRzvHHK4iUIGOAg5GD+VOt7+EvuB+Y+tWH024SzKx/NnqAaxYbeWO5USRuvXqKaIubfmozbumV4708RkLkEEEckVj42OdjFfXB4qb7SyHh1f6jB/wAKHJpm9OTS0LoGXXI4zUN+eV+tJHexORuyG9xS3GLhVZGB/Hincmo7s1fDij5zjOZFFbsgJkYjj5jWJ4cG1iCRzICMGttyA59cmlclDZsCCTj+E/yrnrvixjx3x/KugmH+iy8n7h/lWBdrmxi46YpoUiSIf8SzHoppmmDFu/1qaMH+y+n8BqLSz+5k/wB6mSQ2qD7RMf8AZP8AOqmnpu1KIf8ATTmrtlzNKcfw1X0tN2qRZP8AETUsEdmMLaxAen9abMP9ElXPVTUihWto8ggbev40yZSlq5wPu0It7HJwXt3aTSxxklFHNaFrrkD2rRshBboRVOBAYLuU4+Ykc1E8KnRQxUEjpxyOaZFjoY5o5NORI5EdicEA4I/CtRPk0/bzzdR/+gmuCsYJ3hkk84hV6A1paVr9zcTQ2c3KGUSZ75xikwSOqnYGU1kaq37xB6o39K1HYFzz371kaoMzLjtEaRT2M1OYWxx8wFKwP2UA/wATE/lxSD5YSfVv6VI/+qjXvg/zp7akI6zRDs0mEd+f51o2uAxQFlQjIO7O3ntVDRbeSS3tokyPlJDYztINaPlSW7AyKimRSSg4HBrWnWppqDep9dl940kr7l7eu3e/LHoxH8/SuT0P994zhI/57E5/Ouh85DCSWKnacE9TVDQdFu7PxNbXMwjMTlipVwex7VrWeyOfOZXpxGa6c6kx9Zz/AOgios7UJ/2TRq/z34I/57Of/HRSN/qnwf4c5rm6nziMiKMzzHjqa6Kzh8qCQ+ik1m6fb4Y8VuEbLGZvRG/lSZn1MLRh+5JHpWzqExhs4HABO6sjRxi3/KtXUcNFaRk43E8+lCNeg3SLn7WZywPyYHt3rN8Su63q+X5mQn8A4rV0zEbXATBAIGRVa+uIv7SuEmdV2oCMnrQviBbFrRVaPR4t+dxJ4asK/QvqE+IXOX4OeK6KxYSafER0NYMt7DuuFKuZPNIX5PehbgdLFhLFB0IUcfhWWl38oHkHBIG7PFakxKWhPohP5CuatLyV/stuEIYuNxLD1oWojor077KRegKkZ+vFc3Y6OsN9bz72/dtxW/qoKaZcHPO3j86xNPvWn1OCBmi+UZ+U5zxRHYbN27keG3eWGWSF3wrmNyuR7+tZOn2Pn6tbPjdcM21c1oauGOnkK2CSP51m+H7/AD4gtnklDiNl4C44zRF3QmrHQazBdaG0kcg8udcZANZVvDanUBsSOK5ILFAMBs+nv7dK2PG2sPP5k3y/JLtXA52n/wDV+tYGg6xcXV2UmiSaILn99GM/gQKaWgr6mhqDBbQnPBIqvH5SX8KJg5XeRnmk8UXMR09ykLfLKvyseq1k+Hpkn1fYsXRDzR0H1N3VZAtk5f5VyBzVOxK3t9CYWBji5kf+FcjofetjUoUltD5uBGDlifT2rLtJ0lvI4YE8uFclEC4/EnuaIvQOpvrOkbDy081wPvyDj8F/qah1G7unu/K+0SbPl+UNgfpUXmosgRidw7bTUdyGku3uId4LMp5Q9hj1pJjtoM1eALp7yHqOhFcq2lx3Ukf2olvm6ZrqpLkyadLb3bLJOQ2xlGEDdV3DrgCq+mac0FqpdvMaTlsoML9KadkSONpDZ2Dw28YjXvisY6bbZPy/+O1ryxu96YpJ3ERTIiwBg59fSs2RJxK+2/lxuOPkX/CiyGee6MQL1/8Adrqsjb37c1ymjAm8fnjbXVc54GRUjQ5Qd+egpx56HimLyByc1JwTz+FMZgXuBfzHBPPaod5CYCmrt0n+lSkDnNROg2kY7da4ZS1Z3wi+VFX5mHTpQkJC8sKmjjBZvSpxGB8uMkVPMUoXK3lYHDE/hUbx/Ly1XduecGoZE3E5pqQpwVjAvQBdYGegpIk+bNSXw/07A9BToV55rtjsjgnuy0oIt5P92tjQUB0pu370/wAhWQ3Fu/0ra0Aj+zcYP32NCJRZlsEuItsnNRi0EDFQOAK1YgNvPX0qO5TOTjtTG1oV0GLfp1BqK2QNMAVyMd6n2stuMN271DZ7hLkrng8ipZkR3WnW0gOY8E9xWXNo2P8AVS59nrenZSSOnHQ1CE3AkjI9M0OVtzopuyObFhPFOuUyB3HNVfJb7cBnjPY4rrVRVmTnPtipLPThd6mMp0OacZ3IqFTRYpba+Mb5JJX8ARmttic9cjNMljVPEkygcKyj8lpzHnPTjpTJi7oZcHFnLzn5W/lWHPteyRVbLLjPtWxqJxpdy3/TM1y8e+GwSYOxbbnB5FNMJGtGD/Z+CDjGCajsXhiVwxkOemxcmqVrfyzW7Fk+QZDYOcfgasWN3bqrbHyG55pki2WBcXAHoe1Q6Sf+JnGcdGY1PbrsmlckAMOOah0cf8TNfYNQ9ho7DcBBGBx8oqG8YLYXBJ6KamYgIg77BVTUeNMnJx92kiuhhp8mjMe7Hn3odduiA+v+NOn+TSYlHei4G3R1XHpTIIohs0OVx1INM0OJTqkYx0B/lU2P+JE2O+f50/w6gfUlJHRGpMEdG5+cg+tc9rl/Ja3sYVdymP8ArXQuMyE8AE965jxEubtc/wBwChIp7DI762mXYf3bDrk1Z8sOybGDKo6j/CsO4hG7pnIB/SmxyzQLuSQgDsadl1JR614anWG0RHfOUJKD+Hmr1/MqxHYUJ6Ekc1w+i6jcGy3uck/LkVrpPJcLLcOhAOFyWH8q4Y4OTre0b0PrMFRtCMm9LF0uu0sVPTqKzfBZd/ExbcSFikOc1P5o2H5T93uaj8DLnWLqTssDV6dR3scmcaxTHX7br1T6vIf1AoQAnaxwDxkimXWWvU
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjoAAAF8CAIAAABJw4Z7AAEAAElEQVR4AZT9a4+sS3Yf+GXdsjLrvu/7XPp0N5tkNylRw6FFUdbAhq2xAb+yMBhAX0TfRB/EhuGx/UKAPZaAgYQRNZQgUaSazWafPtd99qXulVlVWeXff63MZ9c5pzXGxK79ZDwRK1asWLFirbg/a89fPL69vb27u/Pc3Nz84IMPPnz5Ac/GxsY6dz8Stba25nV0f79YLK5vbjpW4P39PZCtrS0h19fXA6Rw2DzBLEZrV1dXP//5X15fzyW/u79eu7sf+b9YjEajdf+5Bdz3/vPe1XO0vsZ/v74GPz93c3OTH0nX1lAV5MDX7hf3dyDhQe/GWoGKK8ICqQRrhWotSe7X1hejjdv70Wx+M5/PbxfoX7sbbd4tFtvjMTxwBuwuxKfA64ov1Whzcy2e+9H+/v7Ozg5i5vPrwGxs/N7v/d7W5jYOJK+7u5vZfGtz/fDw0Nv92t2jxwcffIypL27vFq9evf7yq1dXlxKunZ1dfP7lq1evvjo/PkHg3t7eZDKBHM2Xl1c8s9kMD6/noWQ8Hu/u7m5vT9fWNy9nNxsbW3diLq7W7+6mW+Nt6W8Xa9eLtYVaRPbaZLwjyf3t/dX11f3a4nZtAe1ibXR9v7he3N5tbWxsb413pl5vVeji9uLqUh3JaHN9bQOPE7x4+vTpJ598olzY9+jRI+SdnJzc3dyqbhWNV1sbm5Is7m4UvGsNN9C8trn12eefv3nz7vHTJ5gPXuDbt2/nN35v725vlO7Zs2fb29vYCJWSHh8fX15ewinw6OgIAFRo4I5PT8SO0HR7O51OX758SUYkef369fn5uWCESYiSJ0+efPTRR+uje7x6/uwZzMqm4POrmVLMr29QMiLUmypWOdcvZlef/vrXQb6xDrN8FZNHQrKB4Ob8wcGBGkfJ2dkZIk/Pz5D36PBI4P1ogW8XFxfo2ZtOhcjr+fPnf/eP/+if/bN/Bgl6ttfHiql1pLC372V+o9hIeq/ms1//+tc7e3vzm2v4L+dz1CqQGtzbmSg15EqBGPRfXsyfPnm+vTX5+utv8FKrOFhbezze+ZOf/fFHO0/355P92drhbGP3enT+2ZeH4+mL5880GA1P4uu1tb29g6P9o5vL+eX51exyProZjYjt7sb13ugX16/++S//9D9dv341Ors/mC427//+/+qP/6v/7f/m/PLsl7/6mz//83//4Ycffv75r9+9e7e7M8XhIpIMbCGMOKiF9bXNs4vzk8vTi5Rilta04aFxIn9td3eHd68SPjo8UBaSpnT/4//wP6j03WmYfLizD/NkMwK2vT3e3plGjDfWT05O3x6/29iUyeSO6tAu73Dy/mhvl3rQWiMb421VezW/+etffnp1Nb9b39jcmKxvjM7P3u1sjz/54Q/Icymwxc1dNIk2iO1fffkKto2NMQw3t3cbG2tPnzz50Y8+IQYp4HrUDhWhKr/55ht0Juv19e3pBIXqdHS3+PDZ0zXEbKT5X9/cgx+tb/2rf/WvtHRlRD8wMgbb3/pbv/eH/+Xf2Rxvnp4ef/nl1yT8v/zDv/vTn/708uL6s88++w//4T9i29u3rz/Xdo7fvXjx4vd+76cvnz8d3VxNtrcuFexu8fzZBwdHh3c03/39f//f/3OpFjfXsqBwfsh9/IOb2+sS4Cg23N7cmpLPV9+8Qe3d2ihlGd2Nx5ulOReH+wePnxyNFmnRt3d0yd1obVMb//Kr10T60dO0oPHGplK/ff0Gf37w8Yeb441n7MX9glaF//YmTXq8uaXuPvjgI0qPXt3YIr2jk7Oz12/f3N7Mz07erqWG0mZnVzez2fXVDEtu19c34MdYrVhbw975fDbB9Y27/YMpjUn5YAKuHr87JXV/9de/0AY3peGUXx5qUXQ/lYFnxL60/SB1RAMMfVRg/O0AtOtXsQ0gkAcCSlROCka18VOHRI1dCnwBry1/wgJCLMtElcWSKxDMaUAQ8SR6lCdDUrHro1DOuiW2qImnABFNuQoDnmclXk+NSdNo1X+K1o5Ml0eigJJ4lFZZAK8TskaLaWIrKwUMl/Isq7+xHq5SzB5qhl8Syo7Uvvnm9fHJueZ0cXF1cT5T1J/89m9fnJ8V/hBMrHFeUaSC36somfLjoZJU+I2MmFZgrOTm2oZiMVbrG+tb2KBksZt3C6bk7k40V9IFR1UiSSmEef8NLvlKAjkwDheQEe6WE9ulXuhlrCBDTyEHFjYNwMu6WiaREBg3eAacHf79ZwNAydNoU4crtKuQUCStV+orz3QW3ufFryAAMATb2Cy4+jX65a4EuwRHvwH/mwxJ+D0lD/MLv5YpUAh5xnUOTk6swHZDYMLfS1bIBtDIG/5hqg5HeZeun+AhaT+t2Bg6uw4cbYwoiPWtDWpd9+jufoMtTivaGp9cnF9/eUPdTA8PxuPp/PLq8ptvtta37q8X0FL9453ttc31642b2ShdSQQgUK+PNmpiBH6HYK/tGuDhU/jw2rSppXiq09n8r5pZClKXRRIebkjbeJSRRzgpxHBKeWNzzMPYdFTsS3rYqU3h6gWxF4z/bHZ+fnGrca/NdGwvzo8XO1NqUSo92mRU8kOByrFritYZiEcnIkUJuU+DjjYCFqtQPYzQWqIIcnMrLZ2odZGqrnR99QKq4CVvzUPIeAjb7HqG87prtMG/+3f/7t//+39PG8yurpUSgKzZm90yQhqduru9u4Yt3NtYZ890CBZ36d3+g3/wD/7Nv/k356cnjETKPJuxMU+ePoYk9BkQ0ACVIy6l7CGfoGjL6+k2lQJEFb6noOUyDighlx0cug76vgze6ekpYridnSn/waNDfXw2RlodshfPnsOsFFTcp7/+XP9SMS9nM6Rubqx99MFz9S/TGPu79ZubGAI49d/kFYbMZmmD1VXVV/nkBy9397Z1PfUdoV1oZOW8prqbUBQLbGHyTHmXFZYydwLRKXZVZziyantdE57ckJYHcIDubmPzmCrNW4PLYEhebR/ST4hIV5vGK10AqQQFF4Usi8WyreIfMshb6EzWNRIquVE1mC/eQCBQdHpVAy9DQ3LyD8K8i4+YAoIDHqzcWNuQokKSGuVqL4hjEGKuGFuUSFOmJLYkyCrEs13hDNleEYEAou5VgVN5lzP9WQJqTPDNN29PT87n85vx1nTvYFd5r28zYN2e0CsZKMyO5/qnqiqYo1nzDE9SjtF4ewu7jLmQQTUj+yZSeY23irK+mbbDrK3huMwVJXKZIqf44cL9xiajuH4XY4PFiCxPeAYsxg39cCOppRYB/CGmihxULd+FbmscyQ4t1UTjKZILPKi4hk/eK43WflGdqqAC5vU7TlQVdGmE+IE1zkQlch0DBPKgE09aRDEWqs6oMQtRHcXSMCOvBpf6yBtaUZqN9kaV4DxUNIInf+spTaXq8RI8wdidZvyqecOgSRNdwOiRI49AyDvT7xcnQEVYRwF7CNzEC2xsorj471W6Pmn4jJhOq34XanpjcbuhG7i4vl+b34/m92vj0fqjF8/0Wgnh9fr9Nq6MN2+u7mc381evX+9tTXYmu1sbYz2cG6q4DLzyoVhGCGjZwkxktGuCB/8y9wc/opSpqS1/ar4FwXC2OQkcTs4rJ7SLBnJwnZHwAbcofsw3gwCzCQMhRlc6wDyopYOFF1gIwBx1NF/cLW5n9A4GTMfpXkDSqKoZtWAmOXo0vCTMXMtIjQvpWqBoZKNcTYzkojwRX+GlKyrrxtxgSAVGPIDxCGRKeTi07ey1gOUVlXQCA4DgzY0lPE092d1hKiJga2usV5QisdUJ2dg4P7s4PbsgqI8ePTGvg4xf/fKvT2azH/zgB4+ePnnz7q3xaXRPdbUVqjkjIYTh+Rp1NL++jV1swb67SelAcqV9w/kuPhhawZMOvLmdI3Jvnb5aO377FvFPnzGOT6fbE0aUpTRgYpBMFzFX4/EEZzWl/b0dRUY5ToQha8bNo/OLTOSkHZV8AGNSDL4lf/zk4IOXT3Z2Js0xjK9OCdJGu7v7ChtutsOadnAV8Us7IRBAP0WF+pU+Ei6kXSP
"text/plain": [
"<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=570x380>"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Lets create a prompt.\n",
"\n",
"from io import BytesIO\n",
"import requests\n",
"from PIL import Image\n",
"\n",
"from sglang.srt.conversation import chat_templates\n",
"\n",
"image = Image.open(\n",
" BytesIO(\n",
" requests.get(\n",
" \"https://github.com/sgl-project/sglang/blob/main/test/lang/example_image.png?raw=true\"\n",
" ).content\n",
" )\n",
")\n",
"\n",
"conv = chat_templates[chat_template].copy()\n",
"conv.append_message(conv.roles[0], f\"What's shown here: {conv.image_token}?\")\n",
"conv.append_message(conv.roles[1], \"\")\n",
"conv.image_data = [image]\n",
"\n",
"print(conv.get_prompt())\n",
"image"
]
},
{
"cell_type": "markdown",
2025-08-10 19:49:45 -07:00
"id": "4",
"metadata": {},
"source": [
2025-08-10 19:49:45 -07:00
"### Query via the offline Engine API"
]
},
{
"cell_type": "code",
"execution_count": null,
2025-08-10 19:49:45 -07:00
"id": "5",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"You have video processor config saved in `preprocessor.json` file which is deprecated. Video processor configs should be saved in their own `video_preprocessor.json` file. You can rename the file or load and save the processor back which renames it automatically. Loading from `preprocessor.json` will be removed in v5.0.\n",
"You have video processor config saved in `preprocessor.json` file which is deprecated. Video processor configs should be saved in their own `video_preprocessor.json` file. You can rename the file or load and save the processor back which renames it automatically. Loading from `preprocessor.json` will be removed in v5.0.\n",
"Loading safetensors checkpoint shards: 0% Completed | 0/2 [00:00<?, ?it/s]\n",
"Loading safetensors checkpoint shards: 50% Completed | 1/2 [00:03<00:03, 3.13s/it]\n",
"Loading safetensors checkpoint shards: 100% Completed | 2/2 [00:06<00:00, 3.27s/it]\n",
"Loading safetensors checkpoint shards: 100% Completed | 2/2 [00:06<00:00, 3.25s/it]\n",
"\n",
"Capturing batches (bs=1 avail_mem=21.63 GB): 100%|██████████| 35/35 [00:10<00:00, 3.19it/s] \n"
]
}
],
"source": [
"from sglang import Engine\n",
"\n",
"llm = Engine(\n",
" model_path=model_path, chat_template=chat_template, mem_fraction_static=0.8\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
2025-08-10 19:49:45 -07:00
"id": "6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"In the picture, a person in a yellow shirt is hanging laundry on a clothesline attached to the back of a yellow taxi in an urban setting. There are city streets, buildings, and traffic lights visible in the background. The scene appears to be incongruous and amusing, as it shows an unusual and somewhat chaotic activity happening in a busy city environment.\n"
]
}
],
"source": [
"out = llm.generate(prompt=conv.get_prompt(), image_data=[image])\n",
"print(out[\"text\"])"
]
},
{
"cell_type": "markdown",
2025-08-10 19:49:45 -07:00
"id": "7",
"metadata": {},
"source": [
2025-08-10 19:49:45 -07:00
"### Query via the offline Engine API, but send precomputed embeddings"
]
},
{
"cell_type": "code",
"execution_count": null,
2025-08-10 19:49:45 -07:00
"id": "8",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "7c94dead4660409c9acfac1f3461d7d9",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Compute the image embeddings using Huggingface.\n",
"\n",
"from transformers import AutoProcessor\n",
"from transformers import Qwen2_5_VLForConditionalGeneration\n",
"\n",
"processor = AutoProcessor.from_pretrained(model_path, use_fast=True)\n",
"vision = (\n",
" Qwen2_5_VLForConditionalGeneration.from_pretrained(model_path).eval().visual.cuda()\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
2025-08-10 19:49:45 -07:00
"id": "9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The image shows a scene with two yellow taxis in an urban setting. The taxi on the left has a red light on top, indicating that it may be waiting or preparing to drive. The other taxi, which is facing left, has its hatch open with some clothing or fabric hanging out. The background features high-rise buildings and city streets, suggesting this is taking place in a downtown area of a city. The presence of multiple flags on flagpoles indicates that there might be some celebration or event within the vicinity.\n"
]
}
],
"source": [
"processed_prompt = processor(\n",
" images=[image], text=conv.get_prompt(), return_tensors=\"pt\"\n",
")\n",
"input_ids = processed_prompt[\"input_ids\"][0].detach().cpu().tolist()\n",
"precomputed_embeddings = vision(\n",
" processed_prompt[\"pixel_values\"].cuda(), processed_prompt[\"image_grid_thw\"].cuda()\n",
")\n",
"\n",
"mm_item = dict(\n",
" modality=\"IMAGE\",\n",
" image_grid_thw=processed_prompt[\"image_grid_thw\"],\n",
" precomputed_embeddings=precomputed_embeddings,\n",
")\n",
"out = llm.generate(input_ids=input_ids, image_data=[mm_item])\n",
"print(out[\"text\"])"
]
},
{
"cell_type": "markdown",
2025-08-10 19:49:45 -07:00
"id": "10",
"metadata": {},
"source": [
2025-08-10 19:49:45 -07:00
"## Querying Llama 4 (Vision)"
]
},
{
"cell_type": "code",
"execution_count": null,
2025-08-10 19:49:45 -07:00
"id": "11",
"metadata": {},
"outputs": [],
"source": [
"import nest_asyncio\n",
"\n",
"nest_asyncio.apply() # Run this first.\n",
"\n",
"model_path = \"meta-llama/Llama-4-Scout-17B-16E-Instruct\"\n",
"chat_template = \"llama-4\""
]
},
{
"cell_type": "code",
"execution_count": null,
2025-08-10 19:49:45 -07:00
"id": "12",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<|header_start|>user<|header_end|>\n",
"\n",
"What's shown here: <|image|>?<|eot|><|header_start|>assistant<|header_end|>\n",
"\n",
"\n",
"Image size: (570, 380)\n"
]
},
{
"data": {
"image/jpeg": "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAF8AjoDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwDyDRuNQLHnCmur4POccdMVymijN8/H8NdUM7c9+lSNDkwpAHUU7Py4xk5poOeaeAOooGchrCs2qTDPAx/KqHlNj/GtnUULalMcZ5FReQOoHFYTnZm8Kd1cyxGynnj8KcIcirssOGzihEPpxilzh7LUqrD1AFO8sjg8VbRDycHikeMZzS5xuFkZE6gynPpQsSuRlsVJd/LORx0FRpksBW6bsczVmWLWDDO3opxW5oq7bJzz98/yFZkK7YXI/umtbRxnS29fNP8AIVSEbGn6ounTRTHnaM1l3Wo3WuX8zeaY7fPIJ61R1FijKDwp4yelTaSvlpjgjrmlbW4/UqRzvHHK4iUIGOAg5GD+VOt7+EvuB+Y+tWH024SzKx/NnqAaxYbeWO5USRuvXqKaIubfmozbumV4708RkLkEEEckVj42OdjFfXB4qb7SyHh1f6jB/wAKHJpm9OTS0LoGXXI4zUN+eV+tJHexORuyG9xS3GLhVZGB/Hincmo7s1fDij5zjOZFFbsgJkYjj5jWJ4cG1iCRzICMGttyA59cmlclDZsCCTj+E/yrnrvixjx3x/KugmH+iy8n7h/lWBdrmxi46YpoUiSIf8SzHoppmmDFu/1qaMH+y+n8BqLSz+5k/wB6mSQ2qD7RMf8AZP8AOqmnpu1KIf8ATTmrtlzNKcfw1X0tN2qRZP8AETUsEdmMLaxAen9abMP9ElXPVTUihWto8ggbev40yZSlq5wPu0It7HJwXt3aTSxxklFHNaFrrkD2rRshBboRVOBAYLuU4+Ykc1E8KnRQxUEjpxyOaZFjoY5o5NORI5EdicEA4I/CtRPk0/bzzdR/+gmuCsYJ3hkk84hV6A1paVr9zcTQ2c3KGUSZ75xikwSOqnYGU1kaq37xB6o39K1HYFzz371kaoMzLjtEaRT2M1OYWxx8wFKwP2UA/wATE/lxSD5YSfVv6VI/+qjXvg/zp7akI6zRDs0mEd+f51o2uAxQFlQjIO7O3ntVDRbeSS3tokyPlJDYztINaPlSW7AyKimRSSg4HBrWnWppqDep9dl940kr7l7eu3e/LHoxH8/SuT0P994zhI/57E5/Ouh85DCSWKnacE9TVDQdFu7PxNbXMwjMTlipVwex7VrWeyOfOZXpxGa6c6kx9Zz/AOgios7UJ/2TRq/z34I/57Of/HRSN/qnwf4c5rm6nziMiKMzzHjqa6Kzh8qCQ+ik1m6fb4Y8VuEbLGZvRG/lSZn1MLRh+5JHpWzqExhs4HABO6sjRxi3/KtXUcNFaRk43E8+lCNeg3SLn7WZywPyYHt3rN8Su63q+X5mQn8A4rV0zEbXATBAIGRVa+uIv7SuEmdV2oCMnrQviBbFrRVaPR4t+dxJ4asK/QvqE+IXOX4OeK6KxYSafER0NYMt7DuuFKuZPNIX5PehbgdLFhLFB0IUcfhWWl38oHkHBIG7PFakxKWhPohP5CuatLyV/stuEIYuNxLD1oWojor077KRegKkZ+vFc3Y6OsN9bz72/dtxW/qoKaZcHPO3j86xNPvWn1OCBmi+UZ+U5zxRHYbN27keG3eWGWSF3wrmNyuR7+tZOn2Pn6tbPjdcM21c1oauGOnkK2CSP51m+H7/AD4gtnklDiNl4C44zRF3QmrHQazBdaG0kcg8udcZANZVvDanUBsSOK5ILFAMBs+nv7dK2PG2sPP5k3y/JLtXA52n/wDV+tYGg6xcXV2UmiSaILn99GM/gQKaWgr6mhqDBbQnPBIqvH5SX8KJg5XeRnmk8UXMR09ykLfLKvyseq1k+Hpkn1fYsXRDzR0H1N3VZAtk5f5VyBzVOxK3t9CYWBji5kf+FcjofetjUoUltD5uBGDlifT2rLtJ0lvI4YE8uFclEC4/EnuaIvQOpvrOkbDy081wPvyDj8F/qah1G7unu/K+0SbPl+UNgfpUXmosgRidw7bTUdyGku3uId4LMp5Q9hj1pJjtoM1eALp7yHqOhFcq2lx3Ukf2olvm6ZrqpLkyadLb3bLJOQ2xlGEDdV3DrgCq+mac0FqpdvMaTlsoML9KadkSONpDZ2Dw28YjXvisY6bbZPy/+O1ryxu96YpJ3ERTIiwBg59fSs2RJxK+2/lxuOPkX/CiyGee6MQL1/8Adrqsjb37c1ymjAm8fnjbXVc54GRUjQ5Qd+egpx56HimLyByc1JwTz+FMZgXuBfzHBPPaod5CYCmrt0n+lSkDnNROg2kY7da4ZS1Z3wi+VFX5mHTpQkJC8sKmjjBZvSpxGB8uMkVPMUoXK3lYHDE/hUbx/Ly1XduecGoZE3E5pqQpwVjAvQBdYGegpIk+bNSXw/07A9BToV55rtjsjgnuy0oIt5P92tjQUB0pu370/wAhWQ3Fu/0ra0Aj+zcYP32NCJRZlsEuItsnNRi0EDFQOAK1YgNvPX0qO5TOTjtTG1oV0GLfp1BqK2QNMAVyMd6n2stuMN271DZ7hLkrng8ipZkR3WnW0gOY8E9xWXNo2P8AVS59nrenZSSOnHQ1CE3AkjI9M0OVtzopuyObFhPFOuUyB3HNVfJb7cBnjPY4rrVRVmTnPtipLPThd6mMp0OacZ3IqFTRYpba+Mb5JJX8ARmttic9cjNMljVPEkygcKyj8lpzHnPTjpTJi7oZcHFnLzn5W/lWHPteyRVbLLjPtWxqJxpdy3/TM1y8e+GwSYOxbbnB5FNMJGtGD/Z+CDjGCajsXhiVwxkOemxcmqVrfyzW7Fk+QZDYOcfgasWN3bqrbHyG55pki2WBcXAHoe1Q6Sf+JnGcdGY1PbrsmlckAMOOah0cf8TNfYNQ9ho7DcBBGBx8oqG8YLYXBJ6KamYgIg77BVTUeNMnJx92kiuhhp8mjMe7Hn3odduiA+v+NOn+TSYlHei4G3R1XHpTIIohs0OVx1INM0OJTqkYx0B/lU2P+JE2O+f50/w6gfUlJHRGpMEdG5+cg+tc9rl/Ja3sYVdymP8ArXQuMyE8AE965jxEubtc/wBwChIp7DI762mXYf3bDrk1Z8sOybGDKo6j/CsO4hG7pnIB/SmxyzQLuSQgDsadl1JR614anWG0RHfOUJKD+Hmr1/MqxHYUJ6Ekc1w+i6jcGy3uck/LkVrpPJcLLcOhAOFyWH8q4Y4OTre0b0PrMFRtCMm9LF0uu0sVPTqKzfBZd/ExbcSFikOc1P5o2H5T93uaj8DLnWLqTssDV6dR3scmcaxTHX7br1T6vIf1AoQAnaxwDxkimXWWvU
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjoAAAF8CAIAAABJw4Z7AAEAAElEQVR4AZT9a4+sS3Yf+GXdsjLrvu/7XPp0N5tkNylRw6FFUdbAhq2xAb+yMBhAX0TfRB/EhuGx/UKAPZaAgYQRNZQgUaSazWafPtd99qXulVlVWeXff63MZ9c5pzXGxK79ZDwRK1asWLFirbg/a89fPL69vb27u/Pc3Nz84IMPPnz5Ac/GxsY6dz8Stba25nV0f79YLK5vbjpW4P39PZCtrS0h19fXA6Rw2DzBLEZrV1dXP//5X15fzyW/u79eu7sf+b9YjEajdf+5Bdz3/vPe1XO0vsZ/v74GPz93c3OTH0nX1lAV5MDX7hf3dyDhQe/GWoGKK8ICqQRrhWotSe7X1hejjdv70Wx+M5/PbxfoX7sbbd4tFtvjMTxwBuwuxKfA64ov1Whzcy2e+9H+/v7Ozg5i5vPrwGxs/N7v/d7W5jYOJK+7u5vZfGtz/fDw0Nv92t2jxwcffIypL27vFq9evf7yq1dXlxKunZ1dfP7lq1evvjo/PkHg3t7eZDKBHM2Xl1c8s9kMD6/noWQ8Hu/u7m5vT9fWNy9nNxsbW3diLq7W7+6mW+Nt6W8Xa9eLtYVaRPbaZLwjyf3t/dX11f3a4nZtAe1ibXR9v7he3N5tbWxsb413pl5vVeji9uLqUh3JaHN9bQOPE7x4+vTpJ598olzY9+jRI+SdnJzc3dyqbhWNV1sbm5Is7m4UvGsNN9C8trn12eefv3nz7vHTJ5gPXuDbt2/nN35v725vlO7Zs2fb29vYCJWSHh8fX15ewinw6OgIAFRo4I5PT8SO0HR7O51OX758SUYkef369fn5uWCESYiSJ0+efPTRR+uje7x6/uwZzMqm4POrmVLMr29QMiLUmypWOdcvZlef/vrXQb6xDrN8FZNHQrKB4Ob8wcGBGkfJ2dkZIk/Pz5D36PBI4P1ogW8XFxfo2ZtOhcjr+fPnf/eP/+if/bN/Bgl6ttfHiql1pLC372V+o9hIeq/ms1//+tc7e3vzm2v4L+dz1CqQGtzbmSg15EqBGPRfXsyfPnm+vTX5+utv8FKrOFhbezze+ZOf/fFHO0/355P92drhbGP3enT+2ZeH4+mL5880GA1P4uu1tb29g6P9o5vL+eX51exyProZjYjt7sb13ugX16/++S//9D9dv341Ors/mC427//+/+qP/6v/7f/m/PLsl7/6mz//83//4Ycffv75r9+9e7e7M8XhIpIMbCGMOKiF9bXNs4vzk8vTi5Rilta04aFxIn9td3eHd68SPjo8UBaSpnT/4//wP6j03WmYfLizD/NkMwK2vT3e3plGjDfWT05O3x6/29iUyeSO6tAu73Dy/mhvl3rQWiMb421VezW/+etffnp1Nb9b39jcmKxvjM7P3u1sjz/54Q/Icymwxc1dNIk2iO1fffkKto2NMQw3t3cbG2tPnzz50Y8+IQYp4HrUDhWhKr/55ht0Juv19e3pBIXqdHS3+PDZ0zXEbKT5X9/cgx+tb/2rf/WvtHRlRD8wMgbb3/pbv/eH/+Xf2Rxvnp4ef/nl1yT8v/zDv/vTn/708uL6s88++w//4T9i29u3rz/Xdo7fvXjx4vd+76cvnz8d3VxNtrcuFexu8fzZBwdHh3c03/39f//f/3OpFjfXsqBwfsh9/IOb2+sS4Cg23N7cmpLPV9+8Qe3d2ihlGd2Nx5ulOReH+wePnxyNFmnRt3d0yd1obVMb//Kr10T60dO0oPHGplK/ff0Gf37w8Yeb441n7MX9glaF//YmTXq8uaXuPvjgI0qPXt3YIr2jk7Oz12/f3N7Mz07erqWG0mZnVzez2fXVDEtu19c34MdYrVhbw975fDbB9Y27/YMpjUn5YAKuHr87JXV/9de/0AY3peGUXx5qUXQ/lYFnxL60/SB1RAMMfVRg/O0AtOtXsQ0gkAcCSlROCka18VOHRI1dCnwBry1/wgJCLMtElcWSKxDMaUAQ8SR6lCdDUrHro1DOuiW2qImnABFNuQoDnmclXk+NSdNo1X+K1o5Ml0eigJJ4lFZZAK8TskaLaWIrKwUMl/Isq7+xHq5SzB5qhl8Syo7Uvvnm9fHJueZ0cXF1cT5T1J/89m9fnJ8V/hBMrHFeUaSC36somfLjoZJU+I2MmFZgrOTm2oZiMVbrG+tb2KBksZt3C6bk7k40V9IFR1UiSSmEef8NLvlKAjkwDheQEe6WE9ulXuhlrCBDTyEHFjYNwMu6WiaREBg3eAacHf79ZwNAydNoU4crtKuQUCStV+orz3QW3ufFryAAMATb2Cy4+jX65a4EuwRHvwH/mwxJ+D0lD/MLv5YpUAh5xnUOTk6swHZDYMLfS1bIBtDIG/5hqg5HeZeun+AhaT+t2Bg6uw4cbYwoiPWtDWpd9+jufoMtTivaGp9cnF9/eUPdTA8PxuPp/PLq8ptvtta37q8X0FL9453ttc31642b2ShdSQQgUK+PNmpiBH6HYK/tGuDhU/jw2rSppXiq09n8r5pZClKXRRIebkjbeJSRRzgpxHBKeWNzzMPYdFTsS3rYqU3h6gWxF4z/bHZ+fnGrca/NdGwvzo8XO1NqUSo92mRU8kOByrFritYZiEcnIkUJuU+DjjYCFqtQPYzQWqIIcnMrLZ2odZGqrnR99QKq4CVvzUPIeAjb7HqG87prtMG/+3f/7t//+39PG8yurpUSgKzZm90yQhqduru9u4Yt3NtYZ890CBZ36d3+g3/wD/7Nv/k356cnjETKPJuxMU+ePoYk9BkQ0ACVIy6l7CGfoGjL6+k2lQJEFb6noOUyDighlx0cug76vgze6ekpYridnSn/waNDfXw2RlodshfPnsOsFFTcp7/+XP9SMS9nM6Rubqx99MFz9S/TGPu79ZubGAI49d/kFYbMZmmD1VXVV/nkBy9397Z1PfUdoV1oZOW8prqbUBQLbGHyTHmXFZYydwLRKXZVZziyantdE57ckJYHcIDubmPzmCrNW4PLYEhebR/ST4hIV5vGK10AqQQFF4Usi8WyreIfMshb6EzWNRIquVE1mC/eQCBQdHpVAy9DQ3LyD8K8i4+YAoIDHqzcWNuQokKSGuVqL4hjEGKuGFuUSFOmJLYkyCrEs13hDNleEYEAou5VgVN5lzP9WQJqTPDNN29PT87n85vx1nTvYFd5r28zYN2e0CsZKMyO5/qnqiqYo1nzDE9SjtF4ewu7jLmQQTUj+yZSeY23irK+mbbDrK3huMwVJXKZIqf44cL9xiajuH4XY4PFiCxPeAYsxg39cCOppRYB/CGmihxULd+FbmscyQ4t1UTjKZILPKi4hk/eK43WflGdqqAC5vU7TlQVdGmE+IE1zkQlch0DBPKgE09aRDEWqs6oMQtRHcXSMCOvBpf6yBtaUZqN9kaV4DxUNIInf+spTaXq8RI8wdidZvyqecOgSRNdwOiRI49AyDvT7xcnQEVYRwF7CNzEC2xsorj471W6Pmn4jJhOq34XanpjcbuhG7i4vl+b34/m92vj0fqjF8/0Wgnh9fr9Nq6MN2+u7mc381evX+9tTXYmu1sbYz2cG6q4DLzyoVhGCGjZwkxktGuCB/8y9wc/opSpqS1/ar4FwXC2OQkcTs4rJ7SLBnJwnZHwAbcofsw3gwCzCQMhRlc6wDyopYOFF1gIwBx1NF/cLW5n9A4GTMfpXkDSqKoZtWAmOXo0vCTMXMtIjQvpWqBoZKNcTYzkojwRX+GlKyrrxtxgSAVGPIDxCGRKeTi07ey1gOUVlXQCA4DgzY0lPE092d1hKiJga2usV5QisdUJ2dg4P7s4PbsgqI8ePTGvg4xf/fKvT2azH/zgB4+ePnnz7q3xaXRPdbUVqjkjIYTh+Rp1NL++jV1swb67SelAcqV9w/kuPhhawZMOvLmdI3Jvnb5aO377FvFPnzGOT6fbE0aUpTRgYpBMFzFX4/EEZzWl/b0dRUY5ToQha8bNo/OLTOSkHZV8AGNSDL4lf/zk4IOXT3Z2Js0xjK9OCdJGu7v7ChtutsOadnAV8Us7IRBAP0WF+pU+Ei6kXSP
"text/plain": [
"<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=570x380>"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Lets create a prompt.\n",
"\n",
"from io import BytesIO\n",
"import requests\n",
"from PIL import Image\n",
"\n",
"from sglang.srt.conversation import chat_templates\n",
"\n",
"image = Image.open(\n",
" BytesIO(\n",
" requests.get(\n",
" \"https://github.com/sgl-project/sglang/blob/main/test/lang/example_image.png?raw=true\"\n",
" ).content\n",
" )\n",
")\n",
"\n",
"conv = chat_templates[chat_template].copy()\n",
"conv.append_message(conv.roles[0], f\"What's shown here: {conv.image_token}?\")\n",
"conv.append_message(conv.roles[1], \"\")\n",
"conv.image_data = [image]\n",
"\n",
"print(conv.get_prompt())\n",
"print(f\"Image size: {image.size}\")\n",
"\n",
"image"
]
},
{
"cell_type": "markdown",
2025-08-10 19:49:45 -07:00
"id": "13",
"metadata": {},
"source": [
2025-08-10 19:49:45 -07:00
"### Query via the offline Engine API"
]
},
{
"cell_type": "code",
"execution_count": null,
2025-08-10 19:49:45 -07:00
"id": "14",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Loading safetensors checkpoint shards: 0% Completed | 0/50 [00:00<?, ?it/s]\n",
"Loading safetensors checkpoint shards: 2% Completed | 1/50 [00:22<18:10, 22.26s/it]\n",
"Loading safetensors checkpoint shards: 4% Completed | 2/50 [00:44<17:44, 22.17s/it]\n",
"Loading safetensors checkpoint shards: 6% Completed | 3/50 [01:06<17:24, 22.22s/it]\n",
"Loading safetensors checkpoint shards: 8% Completed | 4/50 [01:28<16:55, 22.07s/it]\n",
"Loading safetensors checkpoint shards: 10% Completed | 5/50 [01:50<16:28, 21.96s/it]\n",
"Loading safetensors checkpoint shards: 12% Completed | 6/50 [02:11<15:59, 21.80s/it]\n",
"Loading safetensors checkpoint shards: 14% Completed | 7/50 [02:34<15:52, 22.14s/it]\n",
"Loading safetensors checkpoint shards: 16% Completed | 8/50 [02:54<15:05, 21.57s/it]\n",
"Loading safetensors checkpoint shards: 18% Completed | 9/50 [03:17<14:51, 21.74s/it]\n",
"Loading safetensors checkpoint shards: 20% Completed | 10/50 [03:29<12:31, 18.79s/it]\n",
"Loading safetensors checkpoint shards: 22% Completed | 11/50 [03:32<09:10, 14.13s/it]\n",
"Loading safetensors checkpoint shards: 24% Completed | 12/50 [03:36<06:53, 10.89s/it]\n",
"Loading safetensors checkpoint shards: 26% Completed | 13/50 [03:39<05:19, 8.65s/it]\n",
"Loading safetensors checkpoint shards: 28% Completed | 14/50 [03:43<04:15, 7.09s/it]\n",
"Loading safetensors checkpoint shards: 30% Completed | 15/50 [03:46<03:29, 6.00s/it]\n",
"Loading safetensors checkpoint shards: 32% Completed | 16/50 [03:50<02:57, 5.23s/it]\n",
"Loading safetensors checkpoint shards: 34% Completed | 17/50 [03:53<02:35, 4.73s/it]\n",
"Loading safetensors checkpoint shards: 36% Completed | 18/50 [03:57<02:18, 4.33s/it]\n",
"Loading safetensors checkpoint shards: 38% Completed | 19/50 [04:00<02:06, 4.09s/it]\n",
"Loading safetensors checkpoint shards: 40% Completed | 20/50 [04:04<01:56, 3.87s/it]\n",
"Loading safetensors checkpoint shards: 42% Completed | 21/50 [04:07<01:48, 3.74s/it]\n",
"Loading safetensors checkpoint shards: 44% Completed | 22/50 [04:11<01:43, 3.71s/it]\n",
"Loading safetensors checkpoint shards: 46% Completed | 23/50 [04:14<01:37, 3.63s/it]\n",
"Loading safetensors checkpoint shards: 48% Completed | 24/50 [04:18<01:33, 3.60s/it]\n",
"Loading safetensors checkpoint shards: 50% Completed | 25/50 [04:21<01:26, 3.45s/it]\n",
"Loading safetensors checkpoint shards: 52% Completed | 26/50 [04:21<01:02, 2.61s/it]\n",
"Loading safetensors checkpoint shards: 54% Completed | 27/50 [04:25<01:06, 2.91s/it]\n",
"Loading safetensors checkpoint shards: 56% Completed | 28/50 [04:28<01:07, 3.09s/it]\n",
"Loading safetensors checkpoint shards: 58% Completed | 29/50 [04:32<01:07, 3.20s/it]\n",
"Loading safetensors checkpoint shards: 60% Completed | 30/50 [04:35<01:05, 3.25s/it]\n",
"Loading safetensors checkpoint shards: 62% Completed | 31/50 [04:39<01:02, 3.30s/it]\n",
"Loading safetensors checkpoint shards: 64% Completed | 32/50 [04:42<01:00, 3.37s/it]\n",
"Loading safetensors checkpoint shards: 66% Completed | 33/50 [04:46<00:58, 3.45s/it]\n",
"Loading safetensors checkpoint shards: 68% Completed | 34/50 [04:49<00:55, 3.45s/it]\n",
"Loading safetensors checkpoint shards: 70% Completed | 35/50 [04:53<00:51, 3.45s/it]\n",
"Loading safetensors checkpoint shards: 72% Completed | 36/50 [04:56<00:48, 3.46s/it]\n",
"Loading safetensors checkpoint shards: 74% Completed | 37/50 [05:00<00:44, 3.45s/it]\n",
"Loading safetensors checkpoint shards: 76% Completed | 38/50 [05:03<00:41, 3.45s/it]\n",
"Loading safetensors checkpoint shards: 78% Completed | 39/50 [05:07<00:38, 3.50s/it]\n",
"Loading safetensors checkpoint shards: 80% Completed | 40/50 [05:10<00:34, 3.49s/it]\n",
"Loading safetensors checkpoint shards: 82% Completed | 41/50 [05:14<00:31, 3.49s/it]\n",
"Loading safetensors checkpoint shards: 84% Completed | 42/50 [05:17<00:27, 3.47s/it]\n",
"Loading safetensors checkpoint shards: 86% Completed | 43/50 [05:20<00:24, 3.43s/it]\n",
"Loading safetensors checkpoint shards: 88% Completed | 44/50 [05:24<00:20, 3.46s/it]\n",
"Loading safetensors checkpoint shards: 90% Completed | 45/50 [05:27<00:17, 3.44s/it]\n",
"Loading safetensors checkpoint shards: 92% Completed | 46/50 [05:31<00:13, 3.44s/it]\n",
"Loading safetensors checkpoint shards: 94% Completed | 47/50 [05:34<00:10, 3.43s/it]\n",
"Loading safetensors checkpoint shards: 96% Completed | 48/50 [05:38<00:06, 3.43s/it]\n",
"Loading safetensors checkpoint shards: 98% Completed | 49/50 [05:41<00:03, 3.45s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Setting sliding_window_size to be attention_chunk_size: 8192Setting sliding_window_size to be attention_chunk_size: 8192\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Loading safetensors checkpoint shards: 100% Completed | 50/50 [05:44<00:00, 3.43s/it]\n",
"Loading safetensors checkpoint shards: 100% Completed | 50/50 [05:44<00:00, 6.90s/it]\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Setting sliding_window_size to be attention_chunk_size: 8192\n",
"Setting sliding_window_size to be attention_chunk_size: 8192\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Capturing batches (bs=1 avail_mem=21.53 GB): 100%|██████████| 35/35 [00:15<00:00, 2.25it/s] \n"
]
}
],
"source": [
"from sglang.test.test_utils import is_in_ci\n",
"\n",
"if not is_in_ci():\n",
" from sglang import Engine\n",
"\n",
" llm = Engine(\n",
" model_path=model_path,\n",
" trust_remote_code=True,\n",
" enable_multimodal=True,\n",
" mem_fraction_static=0.8,\n",
" tp_size=4,\n",
" attention_backend=\"fa3\",\n",
" context_length=65536,\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
2025-08-10 19:49:45 -07:00
"id": "15",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The image depicts a man ironing clothing on the back of a yellow SUV in a city street, with another yellow taxi passing by. The man is wearing a yellow shirt and appears to be ironing a blue shirt on a makeshift ironing board set up behind the SUV. The scene suggests that the man may be a street vendor or someone who is trying to make a living by providing ironing services to people on the go.\n"
]
}
],
"source": [
"if not is_in_ci():\n",
" out = llm.generate(prompt=conv.get_prompt(), image_data=[image])\n",
" print(out[\"text\"])"
]
},
{
"cell_type": "markdown",
2025-08-10 19:49:45 -07:00
"id": "16",
"metadata": {},
"source": [
2025-08-10 19:49:45 -07:00
"### Query via the offline Engine API, but send precomputed embeddings"
]
},
{
"cell_type": "code",
"execution_count": null,
2025-08-10 19:49:45 -07:00
"id": "17",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "0eae2e36d07d42b89bc4b5ac7d62f226",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Loading checkpoint shards: 0%| | 0/50 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"if not is_in_ci():\n",
" # Compute the image embeddings using Huggingface.\n",
"\n",
" from transformers import AutoProcessor\n",
" from transformers import Llama4ForConditionalGeneration\n",
"\n",
" processor = AutoProcessor.from_pretrained(model_path, use_fast=True)\n",
" model = Llama4ForConditionalGeneration.from_pretrained(\n",
" model_path, torch_dtype=\"auto\"\n",
" ).eval()\n",
" vision = model.vision_model.cuda()\n",
" multi_modal_projector = model.multi_modal_projector.cuda()"
]
},
{
"cell_type": "code",
"execution_count": null,
2025-08-10 19:49:45 -07:00
"id": "18",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"processed_prompt[\"pixel_values\"].shape=torch.Size([5, 3, 336, 336])\n",
"The image depicts a man ironing on a makeshift ironing board set up on the back of a yellow SUV, in the middle of a busy street. The man is wearing a yellow shirt and appears to be ironing a blue shirt. In the background, there are other yellow taxis and tall buildings, suggesting that the scene is set in a city, likely New York City. The overall scene is one of a person going about their daily activities in a busy urban environment.\n"
]
}
],
"source": [
"if not is_in_ci():\n",
" processed_prompt = processor(\n",
" images=[image], text=conv.get_prompt(), return_tensors=\"pt\"\n",
" )\n",
" print(f'{processed_prompt[\"pixel_values\"].shape=}')\n",
" input_ids = processed_prompt[\"input_ids\"][0].detach().cpu().tolist()\n",
"\n",
" image_outputs = vision(\n",
" processed_prompt[\"pixel_values\"].to(\"cuda\"), output_hidden_states=False\n",
" )\n",
" image_features = image_outputs.last_hidden_state\n",
" vision_flat = image_features.view(-1, image_features.size(-1))\n",
" precomputed_embeddings = multi_modal_projector(vision_flat)\n",
"\n",
" mm_item = dict(modality=\"IMAGE\", precomputed_embeddings=precomputed_embeddings)\n",
" out = llm.generate(input_ids=input_ids, image_data=[mm_item])\n",
" print(out[\"text\"])"
]
}
],
"metadata": {
"jupytext": {
"cell_metadata_filter": "-all",
"custom_cell_magics": "kql",
"encoding": "# -*- coding: utf-8 -*-"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}