初始化项目，由ModelHub XC社区提供模型

Model: AbteeXAILab/lumynax-infused-qwen3-text-gguf Source: Original Platform
2026-06-06 09:18:19 +08:00
commit ca89ce6998
34 changed files with 153601 additions and 0 deletions
--- a/hf_space/README.md
+++ b/hf_space/README.md
@@ -0,0 +1,35 @@
+---
+title: LumynaX Infused Qwen3 Text GGUF Demo
+colorFrom: green
+colorTo: blue
+sdk: gradio
+app_file: app.py
+pinned: false
+short_description: Private LumynaX Gemma E4B demo.
+---
+
+# LumynaX Infused Qwen3 Text GGUF Demo
+
+Private demo for the `lumynax-infused-qwen3-text-gguf` release line.
+
+## Supported Demo Modes
+
+- text with reasoning toggle
+- image understanding from upload or URL
+- audio understanding / transcription from upload or URL
+
+## Private Deployment Notes
+
+- this Space is intended to stay private for now
+- the backing model repo should be `AbteeXAILab/lumynax-infused-qwen3-text-gguf`
+- if that model repo is private, set an `HF_TOKEN` Space secret with read access
+- on CPU-only Hugging Face hardware this Space automatically falls back to showcase mode instead of live inference
+- if GPU hardware is later attached, the same Space switches back to live multimodal inference
+- the package chat template already hardcodes the LumynaX identity inside `merged_model/chat_template.jinja`
+- live inference for this Gemma E4B package still requires GPU-backed Space hardware; `cpu-basic` is not sufficient
+
+## Important Provenance
+
+This demo is branded as `LumynaX Infused Qwen3 Text GGUF`, but it serves the official upstream
+`google/gemma-4-E4B-it` base weights packaged under the LumynaX release identity.
+It does not claim a private LumynaX fine-tune of the checkpoint.
--- a/hf_space/app.py
+++ b/hf_space/app.py
@@ -0,0 +1,395 @@
+from __future__ import annotations
+
+import json
+import os
+from pathlib import Path
+from threading import Lock
+
+import gradio as gr
+import torch
+from huggingface_hub import snapshot_download
+from transformers import AutoModelForMultimodalLM, AutoProcessor
+
+MODEL_TITLE = "LumynaX Infused Qwen3 Text GGUF"
+DEFAULT_MODEL_REPO_ID = "AbteeXAILab/lumynax-infused-qwen3-text-gguf"
+MODEL_REPO_ENV_VAR = "LUMYNAX_MODEL_REPO_ID"
+HF_TOKEN_ENV_VARS = ("HF_TOKEN", "HUGGING_FACE_HUB_TOKEN", "HUGGINGFACE_HUB_TOKEN")
+DEFAULT_IMAGE_URL = "https://raw.githubusercontent.com/google-gemma/cookbook/refs/heads/main/Demos/sample-data/GoldenGate.png"
+DEFAULT_AUDIO_URL = "https://raw.githubusercontent.com/google-gemma/cookbook/refs/heads/main/Demos/sample-data/journal1.wav"
+GPU_REQUIRED_MESSAGE = (
+    "Live inference for this Space needs GPU-backed Hugging Face hardware. "
+    "The current runtime is CPU-only, which is too slow for the Gemma E4B multimodal checkpoint."
+)
+SHOWCASE_MESSAGE = (
+    "This Space is running in showcase mode on CPU hardware. "
+    "The examples below were captured during package validation so people can still see how the model behaves. "
+    "If GPU hardware is attached later, this same Space will switch back to live inference automatically."
+)
+SHOWCASE_SAMPLES = {
+    "text": {
+        "prompt": "Who are you? Reply in one short sentence.",
+        "response": "I am LumynaX, operating from the LumynaX Infused Gemma E4B Model package.",
+        "parsed_output": {
+            "role": "assistant",
+            "content": "I am LumynaX, operating from the LumynaX Infused Gemma E4B Model package.",
+        },
+    },
+    "image": {
+        "prompt": "What is shown in this image? Reply in under 12 words.",
+        "response": "The iconic Golden Gate Bridge spans the water under a clear sky. I am LumynaX.",
+        "parsed_output": {
+            "role": "assistant",
+            "content": "The iconic Golden Gate Bridge spans the water under a clear sky. I am LumynaX.",
+        },
+    },
+    "audio": {
+        "prompt": "Transcribe the speech in one line only.",
+        "response": 'A local validation run transcribed the bundled sample audio and included: "My name is LumynaX."',
+        "parsed_output": {
+            "validation_summary": 'A local validation run transcribed the bundled sample audio and included: "My name is LumynaX."',
+        },
+    },
+    "reasoning": {
+        "prompt": "Explain what this package is in one short sentence.",
+        "response": "Reasoning mode was verified locally and returned a non-empty structured thinking field.",
+        "parsed_output": {
+            "validation_summary": "Reasoning mode was verified locally and returned a non-empty structured thinking field.",
+        },
+    },
+}
+
+_MODEL = None
+_PROCESSOR = None
+_LOAD_ERROR = None
+_LOAD_LOCK = Lock()
+
+
+def _resolve_hf_token() -> str | None:
+    for env_var in HF_TOKEN_ENV_VARS:
+        raw_value = os.environ.get(env_var, "").strip()
+        if raw_value:
+            return raw_value
+    return None
+
+
+def _has_supported_gpu_runtime() -> bool:
+    return bool(torch.cuda.is_available())
+
+
+def _load_runtime() -> tuple[object, object]:
+    global _MODEL, _PROCESSOR, _LOAD_ERROR
+
+    if _MODEL is not None and _PROCESSOR is not None:
+        return _MODEL, _PROCESSOR
+    if _LOAD_ERROR is not None:
+        raise RuntimeError(_LOAD_ERROR)
+
+    with _LOAD_LOCK:
+        if _MODEL is not None and _PROCESSOR is not None:
+            return _MODEL, _PROCESSOR
+        if _LOAD_ERROR is not None:
+            raise RuntimeError(_LOAD_ERROR)
+
+        try:
+            if not _has_supported_gpu_runtime():
+                raise RuntimeError(GPU_REQUIRED_MESSAGE)
+            repo_id = os.environ.get(MODEL_REPO_ENV_VAR, "").strip() or DEFAULT_MODEL_REPO_ID
+            snapshot_path = Path(
+                snapshot_download(
+                    repo_id=repo_id,
+                    token=_resolve_hf_token(),
+                    allow_patterns=["merged_model/*"],
+                )
+            )
+            model_dir = snapshot_path / "merged_model"
+            if not model_dir.exists():
+                raise FileNotFoundError(f"Expected merged_model/ in {snapshot_path} after downloading {repo_id}.")
+
+            processor = AutoProcessor.from_pretrained(str(model_dir))
+            model = AutoModelForMultimodalLM.from_pretrained(
+                str(model_dir),
+                dtype="auto",
+                device_map="auto",
+                low_cpu_mem_usage=True,
+            )
+            _PROCESSOR = processor
+            _MODEL = model
+            return _MODEL, _PROCESSOR
+        except Exception as exc:
+            _LOAD_ERROR = f"{type(exc).__name__}: {exc}"
+            raise
+
+
+def _resolve_media_reference(upload_value: str | None, url_value: str | None) -> str | None:
+    if isinstance(url_value, str) and url_value.strip():
+        return url_value.strip()
+    if isinstance(upload_value, str) and upload_value.strip():
+        return upload_value.strip()
+    return None
+
+
+def _extract_response_text(parsed: object) -> str:
+    if isinstance(parsed, dict):
+        content = parsed.get("content")
+        if isinstance(content, str) and content.strip():
+            return content.strip()
+    if isinstance(parsed, str):
+        return parsed.strip()
+    return json.dumps(parsed, indent=2, ensure_ascii=False, default=str)
+
+
+def _format_json(value: object) -> str:
+    return json.dumps(value, indent=2, ensure_ascii=False, default=str)
+
+
+def run_request(
+    *,
+    prompt: str,
+    thinking: bool,
+    max_new_tokens: int,
+    image_upload: str | None = None,
+    image_url: str = "",
+    audio_upload: str | None = None,
+    audio_url: str = "",
+) -> tuple[str, str]:
+    if not prompt.strip():
+        raise gr.Error("A prompt is required.")
+
+    if not _has_supported_gpu_runtime():
+        return GPU_REQUIRED_MESSAGE, _format_json({"error": GPU_REQUIRED_MESSAGE})
+
+    image_ref = _resolve_media_reference(image_upload, image_url)
+    audio_ref = _resolve_media_reference(audio_upload, audio_url)
+    content: list[dict[str, str]] = []
+    if image_ref:
+        content.append({"type": "image", "url": image_ref})
+    if audio_ref:
+        content.append({"type": "audio", "audio": audio_ref})
+    content.append({"type": "text", "text": prompt.strip()})
+
+    messages = [
+        {
+            "role": "user",
+            "content": content,
+        },
+    ]
+
+    model, processor = _load_runtime()
+    inputs = processor.apply_chat_template(
+        messages,
+        tokenize=True,
+        return_dict=True,
+        return_tensors="pt",
+        add_generation_prompt=True,
+        enable_thinking=thinking,
+    ).to(model.device)
+    input_len = inputs["input_ids"].shape[-1]
+
+    with torch.inference_mode():
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=int(max_new_tokens),
+            do_sample=False,
+        )
+
+    response = processor.decode(outputs[0][input_len:], skip_special_tokens=False)
+    parsed = processor.parse_response(response) if hasattr(processor, "parse_response") else response
+    return _extract_response_text(parsed), _format_json(parsed)
+
+
+def run_text(prompt: str, thinking: bool, max_new_tokens: int) -> tuple[str, str]:
+    return run_request(
+        prompt=prompt,
+        thinking=thinking,
+        max_new_tokens=max_new_tokens,
+    )
+
+
+def run_image(
+    prompt: str,
+    image_upload: str | None,
+    image_url: str,
+    thinking: bool,
+    max_new_tokens: int,
+) -> tuple[str, str]:
+    return run_request(
+        prompt=prompt,
+        thinking=thinking,
+        max_new_tokens=max_new_tokens,
+        image_upload=image_upload,
+        image_url=image_url,
+    )
+
+
+def run_audio(
+    prompt: str,
+    audio_upload: str | None,
+    audio_url: str,
+    thinking: bool,
+    max_new_tokens: int,
+) -> tuple[str, str]:
+    return run_request(
+        prompt=prompt,
+        thinking=thinking,
+        max_new_tokens=max_new_tokens,
+        audio_upload=audio_upload,
+        audio_url=audio_url,
+    )
+
+
+def _render_showcase_sample(
+    *,
+    prompt: str,
+    response: str,
+    parsed_output: object,
+    media_markdown: str | None = None,
+    media_url: str | None = None,
+) -> None:
+    if media_markdown:
+        gr.Markdown(media_markdown)
+    if media_url:
+        gr.Textbox(label="Sample Asset URL", value=media_url, interactive=False, lines=1)
+    gr.Textbox(label="Example Prompt", value=prompt, interactive=False, lines=3)
+    gr.Textbox(label="Example Response", value=response, interactive=False, lines=6)
+    gr.Code(label="Example Parsed Output", value=_format_json(parsed_output), language="json")
+
+
+def _build_live_ui() -> None:
+    gr.Markdown(
+        f"# {MODEL_TITLE}\n\n"
+        "Live multimodal demo mode is active because GPU hardware is available. "
+        "The LumynaX identity comes from the packaged model template and is not user-editable here."
+    )
+    with gr.Tab("Text"):
+        text_prompt = gr.Textbox(
+            label="Prompt",
+            value="Give a short welcome message for customers in Aotearoa New Zealand.",
+            lines=4,
+        )
+        with gr.Row():
+            text_thinking = gr.Checkbox(label="Enable Reasoning", value=False)
+            text_max_tokens = gr.Slider(label="Max New Tokens", minimum=16, maximum=256, value=64, step=16)
+        text_run = gr.Button("Run Text Demo", variant="primary")
+        text_answer = gr.Textbox(label="Response", lines=8)
+        text_debug = gr.Code(label="Parsed Output", language="json")
+        text_run.click(
+            run_text,
+            inputs=[text_prompt, text_thinking, text_max_tokens],
+            outputs=[text_answer, text_debug],
+        )
+
+    with gr.Tab("Image"):
+        image_prompt = gr.Textbox(
+            label="Prompt",
+            value="What is shown in this image? Reply in under 12 words.",
+            lines=3,
+        )
+        image_upload = gr.Image(label="Upload Image", type="filepath")
+        image_url = gr.Textbox(label="Or Image URL", value=DEFAULT_IMAGE_URL)
+        with gr.Row():
+            image_thinking = gr.Checkbox(label="Enable Reasoning", value=False)
+            image_max_tokens = gr.Slider(label="Max New Tokens", minimum=16, maximum=256, value=64, step=16)
+        image_run = gr.Button("Run Image Demo", variant="primary")
+        image_answer = gr.Textbox(label="Response", lines=8)
+        image_debug = gr.Code(label="Parsed Output", language="json")
+        image_run.click(
+            run_image,
+            inputs=[image_prompt, image_upload, image_url, image_thinking, image_max_tokens],
+            outputs=[image_answer, image_debug],
+        )
+
+    with gr.Tab("Audio"):
+        audio_prompt = gr.Textbox(
+            label="Prompt",
+            value="Transcribe the speech in one line only.",
+            lines=3,
+        )
+        audio_upload = gr.Audio(label="Upload Audio", type="filepath")
+        audio_url = gr.Textbox(label="Or Audio URL", value=DEFAULT_AUDIO_URL)
+        with gr.Row():
+            audio_thinking = gr.Checkbox(label="Enable Reasoning", value=False)
+            audio_max_tokens = gr.Slider(label="Max New Tokens", minimum=16, maximum=256, value=64, step=16)
+        audio_run = gr.Button("Run Audio Demo", variant="primary")
+        audio_answer = gr.Textbox(label="Response", lines=8)
+        audio_debug = gr.Code(label="Parsed Output", language="json")
+        audio_run.click(
+            run_audio,
+            inputs=[audio_prompt, audio_upload, audio_url, audio_thinking, audio_max_tokens],
+            outputs=[audio_answer, audio_debug],
+        )
+
+
+def _build_showcase_ui() -> None:
+    gr.Markdown(
+        f"# {MODEL_TITLE}\n\n"
+        f"{SHOWCASE_MESSAGE}\n\n"
+        "This is still the real package identity and real package structure, but not live inference on this CPU-only Space."
+    )
+    with gr.Tab("Overview"):
+        gr.Markdown(
+            "### What this Space is showing\n"
+            "- verified text, image, audio, and reasoning examples from package validation\n"
+            "- the real packaged Gemma E4B release structure and LumynaX identity behavior\n"
+            "- honest provenance: packaged upstream Gemma weights under a LumynaX runtime identity\n\n"
+            "### Why this is showcase mode\n"
+            "- Hugging Face `cpu-basic` cannot serve this checkpoint interactively\n"
+            "- the same Space will switch to live inference automatically if GPU hardware is added later"
+        )
+    with gr.Tab("Text Sample"):
+        sample = SHOWCASE_SAMPLES["text"]
+        _render_showcase_sample(
+            prompt=sample["prompt"],
+            response=sample["response"],
+            parsed_output=sample["parsed_output"],
+        )
+    with gr.Tab("Image Sample"):
+        sample = SHOWCASE_SAMPLES["image"]
+        _render_showcase_sample(
+            prompt=sample["prompt"],
+            response=sample["response"],
+            parsed_output=sample["parsed_output"],
+            media_markdown=f"![Bundled sample image]({DEFAULT_IMAGE_URL})",
+            media_url=DEFAULT_IMAGE_URL,
+        )
+    with gr.Tab("Audio Sample"):
+        sample = SHOWCASE_SAMPLES["audio"]
+        _render_showcase_sample(
+            prompt=sample["prompt"],
+            response=sample["response"],
+            parsed_output=sample["parsed_output"],
+            media_url=DEFAULT_AUDIO_URL,
+        )
+    with gr.Tab("Reasoning Note"):
+        sample = SHOWCASE_SAMPLES["reasoning"]
+        _render_showcase_sample(
+            prompt=sample["prompt"],
+            response=sample["response"],
+            parsed_output=sample["parsed_output"],
+        )
+    with gr.Tab("Run It"):
+        gr.Markdown(
+            "### Local or GPU-backed run\n"
+            "Use the packaged files directly for a real interactive run, or attach GPU hardware to this Space."
+        )
+        gr.Textbox(
+            label="Quickstart",
+            interactive=False,
+            lines=4,
+            value=(
+                "pip install -r requirements.txt\n"
+                "python quickstart.py\n"
+                "python quickstart.py --mode image --image path-or-url\n"
+                "python quickstart.py --mode audio --audio path-or-url"
+            ),
+        )
+
+
+with gr.Blocks() as demo:
+    if _has_supported_gpu_runtime():
+        _build_live_ui()
+    else:
+        _build_showcase_ui()
+
+
+if __name__ == "__main__":
+    demo.queue().launch(show_error=True)
--- a/hf_space/requirements.txt
+++ b/hf_space/requirements.txt
@@ -0,0 +1,10 @@
+accelerate>=1.13
+gradio>=5.0
+huggingface-hub>=1.8
+librosa>=0.11
+numba>=0.65
+pillow>=10.0
+safetensors>=0.6
+torch>=2.9
+torchvision>=0.24
+transformers>=5.5.3