初始化项目，由ModelHub XC社区提供模型

Model: Hcompany/Holo1-7B Source: Original Platform
2026-05-21 20:42:13 +08:00
commit ffb26d9020
24 changed files with 161418 additions and 0 deletions
--- a/localization.py
+++ b/localization.py
@@ -0,0 +1,52 @@
+import json
+from typing import Any, Literal
+
+from pydantic import BaseModel
+
+
+def get_localization_prompt(image, instruction: str) -> list[dict[str, Any]]:
+    guidelines: str = "Localize an element on the GUI image according to my instructions and output a click position as Click(x, y) with x num pixels from the left edge and y num pixels from the top edge."
+
+    return [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "image",
+                    "image": image,
+                },
+                {"type": "text", "text": f"{guidelines}\n{instruction}"},
+            ],
+        }
+    ]
+
+
+class ClickAction(BaseModel):
+    """Click at specific coordinates on the screen."""
+
+    action: Literal["click"] = "click"
+    x: int
+    """The x coordinate, number of pixels from the left edge."""
+    y: int
+    """The y coordinate, number of pixels from the top edge."""
+
+
+def get_localization_prompt_structured_output(image, instruction: str) -> list[dict[str, Any]]:
+    guidelines: str = "Localize an element on the GUI image according to my instructions and output a click position. You must output a valid JSON format."
+
+    return [
+        {
+            "role": "system",
+            "content": json.dumps([ClickAction.model_json_schema()]),
+        },
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "image",
+                    "image": image,
+                },
+                {"type": "text", "text": f"{guidelines}\n{instruction}"},
+            ],
+        },
+    ]