lumynax-infused-qwen3-text-…/hf_space/app.py

from __future__ import annotations

import json
import os
from pathlib import Path
from threading import Lock

import gradio as gr
import torch
from huggingface_hub import snapshot_download
from transformers import AutoModelForMultimodalLM, AutoProcessor

MODEL_TITLE = "LumynaX Infused Qwen3 Text GGUF"
DEFAULT_MODEL_REPO_ID = "AbteeXAILab/lumynax-infused-qwen3-text-gguf"
MODEL_REPO_ENV_VAR = "LUMYNAX_MODEL_REPO_ID"
HF_TOKEN_ENV_VARS = ("HF_TOKEN", "HUGGING_FACE_HUB_TOKEN", "HUGGINGFACE_HUB_TOKEN")
DEFAULT_IMAGE_URL = "https://raw.githubusercontent.com/google-gemma/cookbook/refs/heads/main/Demos/sample-data/GoldenGate.png"
DEFAULT_AUDIO_URL = "https://raw.githubusercontent.com/google-gemma/cookbook/refs/heads/main/Demos/sample-data/journal1.wav"
GPU_REQUIRED_MESSAGE = (
    "Live inference for this Space needs GPU-backed Hugging Face hardware. "
    "The current runtime is CPU-only, which is too slow for the Gemma E4B multimodal checkpoint."
)
SHOWCASE_MESSAGE = (
    "This Space is running in showcase mode on CPU hardware. "
    "The examples below were captured during package validation so people can still see how the model behaves. "
    "If GPU hardware is attached later, this same Space will switch back to live inference automatically."
)
SHOWCASE_SAMPLES = {
    "text": {
        "prompt": "Who are you? Reply in one short sentence.",
        "response": "I am LumynaX, operating from the LumynaX Infused Gemma E4B Model package.",
        "parsed_output": {
            "role": "assistant",
            "content": "I am LumynaX, operating from the LumynaX Infused Gemma E4B Model package.",
        },
    },
    "image": {
        "prompt": "What is shown in this image? Reply in under 12 words.",
        "response": "The iconic Golden Gate Bridge spans the water under a clear sky. I am LumynaX.",
        "parsed_output": {
            "role": "assistant",
            "content": "The iconic Golden Gate Bridge spans the water under a clear sky. I am LumynaX.",
        },
    },
    "audio": {
        "prompt": "Transcribe the speech in one line only.",
        "response": 'A local validation run transcribed the bundled sample audio and included: "My name is LumynaX."',
        "parsed_output": {
            "validation_summary": 'A local validation run transcribed the bundled sample audio and included: "My name is LumynaX."',
        },
    },
    "reasoning": {
        "prompt": "Explain what this package is in one short sentence.",
        "response": "Reasoning mode was verified locally and returned a non-empty structured thinking field.",
        "parsed_output": {
            "validation_summary": "Reasoning mode was verified locally and returned a non-empty structured thinking field.",
        },
    },
}

_MODEL = None
_PROCESSOR = None
_LOAD_ERROR = None
_LOAD_LOCK = Lock()


def _resolve_hf_token() -> str | None:
    for env_var in HF_TOKEN_ENV_VARS:
        raw_value = os.environ.get(env_var, "").strip()
        if raw_value:
            return raw_value
    return None


def _has_supported_gpu_runtime() -> bool:
    return bool(torch.cuda.is_available())


def _load_runtime() -> tuple[object, object]:
    global _MODEL, _PROCESSOR, _LOAD_ERROR

    if _MODEL is not None and _PROCESSOR is not None:
        return _MODEL, _PROCESSOR
    if _LOAD_ERROR is not None:
        raise RuntimeError(_LOAD_ERROR)

    with _LOAD_LOCK:
        if _MODEL is not None and _PROCESSOR is not None:
            return _MODEL, _PROCESSOR
        if _LOAD_ERROR is not None:
            raise RuntimeError(_LOAD_ERROR)

        try:
            if not _has_supported_gpu_runtime():
                raise RuntimeError(GPU_REQUIRED_MESSAGE)
            repo_id = os.environ.get(MODEL_REPO_ENV_VAR, "").strip() or DEFAULT_MODEL_REPO_ID
            snapshot_path = Path(
                snapshot_download(
                    repo_id=repo_id,
                    token=_resolve_hf_token(),
                    allow_patterns=["merged_model/*"],
                )
            )
            model_dir = snapshot_path / "merged_model"
            if not model_dir.exists():
                raise FileNotFoundError(f"Expected merged_model/ in {snapshot_path} after downloading {repo_id}.")

            processor = AutoProcessor.from_pretrained(str(model_dir))
            model = AutoModelForMultimodalLM.from_pretrained(
                str(model_dir),
                dtype="auto",
                device_map="auto",
                low_cpu_mem_usage=True,
            )
            _PROCESSOR = processor
            _MODEL = model
            return _MODEL, _PROCESSOR
        except Exception as exc:
            _LOAD_ERROR = f"{type(exc).__name__}: {exc}"
            raise


def _resolve_media_reference(upload_value: str | None, url_value: str | None) -> str | None:
    if isinstance(url_value, str) and url_value.strip():
        return url_value.strip()
    if isinstance(upload_value, str) and upload_value.strip():
        return upload_value.strip()
    return None


def _extract_response_text(parsed: object) -> str:
    if isinstance(parsed, dict):
        content = parsed.get("content")
        if isinstance(content, str) and content.strip():
            return content.strip()
    if isinstance(parsed, str):
        return parsed.strip()
    return json.dumps(parsed, indent=2, ensure_ascii=False, default=str)


def _format_json(value: object) -> str:
    return json.dumps(value, indent=2, ensure_ascii=False, default=str)


def run_request(
    *,
    prompt: str,
    thinking: bool,
    max_new_tokens: int,
    image_upload: str | None = None,
    image_url: str = "",
    audio_upload: str | None = None,
    audio_url: str = "",
) -> tuple[str, str]:
    if not prompt.strip():
        raise gr.Error("A prompt is required.")

    if not _has_supported_gpu_runtime():
        return GPU_REQUIRED_MESSAGE, _format_json({"error": GPU_REQUIRED_MESSAGE})

    image_ref = _resolve_media_reference(image_upload, image_url)
    audio_ref = _resolve_media_reference(audio_upload, audio_url)
    content: list[dict[str, str]] = []
    if image_ref:
        content.append({"type": "image", "url": image_ref})
    if audio_ref:
        content.append({"type": "audio", "audio": audio_ref})
    content.append({"type": "text", "text": prompt.strip()})

    messages = [
        {
            "role": "user",
            "content": content,
        },
    ]

    model, processor = _load_runtime()
    inputs = processor.apply_chat_template(
        messages,
        tokenize=True,
        return_dict=True,
        return_tensors="pt",
        add_generation_prompt=True,
        enable_thinking=thinking,
    ).to(model.device)
    input_len = inputs["input_ids"].shape[-1]

    with torch.inference_mode():
        outputs = model.generate(
            **inputs,
            max_new_tokens=int(max_new_tokens),
            do_sample=False,
        )

    response = processor.decode(outputs[0][input_len:], skip_special_tokens=False)
    parsed = processor.parse_response(response) if hasattr(processor, "parse_response") else response
    return _extract_response_text(parsed), _format_json(parsed)


def run_text(prompt: str, thinking: bool, max_new_tokens: int) -> tuple[str, str]:
    return run_request(
        prompt=prompt,
        thinking=thinking,
        max_new_tokens=max_new_tokens,
    )


def run_image(
    prompt: str,
    image_upload: str | None,
    image_url: str,
    thinking: bool,
    max_new_tokens: int,
) -> tuple[str, str]:
    return run_request(
        prompt=prompt,
        thinking=thinking,
        max_new_tokens=max_new_tokens,
        image_upload=image_upload,
        image_url=image_url,
    )


def run_audio(
    prompt: str,
    audio_upload: str | None,
    audio_url: str,
    thinking: bool,
    max_new_tokens: int,
) -> tuple[str, str]:
    return run_request(
        prompt=prompt,
        thinking=thinking,
        max_new_tokens=max_new_tokens,
        audio_upload=audio_upload,
        audio_url=audio_url,
    )


def _render_showcase_sample(
    *,
    prompt: str,
    response: str,
    parsed_output: object,
    media_markdown: str | None = None,
    media_url: str | None = None,
) -> None:
    if media_markdown:
        gr.Markdown(media_markdown)
    if media_url:
        gr.Textbox(label="Sample Asset URL", value=media_url, interactive=False, lines=1)
    gr.Textbox(label="Example Prompt", value=prompt, interactive=False, lines=3)
    gr.Textbox(label="Example Response", value=response, interactive=False, lines=6)
    gr.Code(label="Example Parsed Output", value=_format_json(parsed_output), language="json")


def _build_live_ui() -> None:
    gr.Markdown(
        f"# {MODEL_TITLE}\n\n"
        "Live multimodal demo mode is active because GPU hardware is available. "
        "The LumynaX identity comes from the packaged model template and is not user-editable here."
    )
    with gr.Tab("Text"):
        text_prompt = gr.Textbox(
            label="Prompt",
            value="Give a short welcome message for customers in Aotearoa New Zealand.",
            lines=4,
        )
        with gr.Row():
            text_thinking = gr.Checkbox(label="Enable Reasoning", value=False)
            text_max_tokens = gr.Slider(label="Max New Tokens", minimum=16, maximum=256, value=64, step=16)
        text_run = gr.Button("Run Text Demo", variant="primary")
        text_answer = gr.Textbox(label="Response", lines=8)
        text_debug = gr.Code(label="Parsed Output", language="json")
        text_run.click(
            run_text,
            inputs=[text_prompt, text_thinking, text_max_tokens],
            outputs=[text_answer, text_debug],
        )

    with gr.Tab("Image"):
        image_prompt = gr.Textbox(
            label="Prompt",
            value="What is shown in this image? Reply in under 12 words.",
            lines=3,
        )
        image_upload = gr.Image(label="Upload Image", type="filepath")
        image_url = gr.Textbox(label="Or Image URL", value=DEFAULT_IMAGE_URL)
        with gr.Row():
            image_thinking = gr.Checkbox(label="Enable Reasoning", value=False)
            image_max_tokens = gr.Slider(label="Max New Tokens", minimum=16, maximum=256, value=64, step=16)
        image_run = gr.Button("Run Image Demo", variant="primary")
        image_answer = gr.Textbox(label="Response", lines=8)
        image_debug = gr.Code(label="Parsed Output", language="json")
        image_run.click(
            run_image,
            inputs=[image_prompt, image_upload, image_url, image_thinking, image_max_tokens],
            outputs=[image_answer, image_debug],
        )

    with gr.Tab("Audio"):
        audio_prompt = gr.Textbox(
            label="Prompt",
            value="Transcribe the speech in one line only.",
            lines=3,
        )
        audio_upload = gr.Audio(label="Upload Audio", type="filepath")
        audio_url = gr.Textbox(label="Or Audio URL", value=DEFAULT_AUDIO_URL)
        with gr.Row():
            audio_thinking = gr.Checkbox(label="Enable Reasoning", value=False)
            audio_max_tokens = gr.Slider(label="Max New Tokens", minimum=16, maximum=256, value=64, step=16)
        audio_run = gr.Button("Run Audio Demo", variant="primary")
        audio_answer = gr.Textbox(label="Response", lines=8)
        audio_debug = gr.Code(label="Parsed Output", language="json")
        audio_run.click(
            run_audio,
            inputs=[audio_prompt, audio_upload, audio_url, audio_thinking, audio_max_tokens],
            outputs=[audio_answer, audio_debug],
        )


def _build_showcase_ui() -> None:
    gr.Markdown(
        f"# {MODEL_TITLE}\n\n"
        f"{SHOWCASE_MESSAGE}\n\n"
        "This is still the real package identity and real package structure, but not live inference on this CPU-only Space."
    )
    with gr.Tab("Overview"):
        gr.Markdown(
            "### What this Space is showing\n"
            "- verified text, image, audio, and reasoning examples from package validation\n"
            "- the real packaged Gemma E4B release structure and LumynaX identity behavior\n"
            "- honest provenance: packaged upstream Gemma weights under a LumynaX runtime identity\n\n"
            "### Why this is showcase mode\n"
            "- Hugging Face `cpu-basic` cannot serve this checkpoint interactively\n"
            "- the same Space will switch to live inference automatically if GPU hardware is added later"
        )
    with gr.Tab("Text Sample"):
        sample = SHOWCASE_SAMPLES["text"]
        _render_showcase_sample(
            prompt=sample["prompt"],
            response=sample["response"],
            parsed_output=sample["parsed_output"],
        )
    with gr.Tab("Image Sample"):
        sample = SHOWCASE_SAMPLES["image"]
        _render_showcase_sample(
            prompt=sample["prompt"],
            response=sample["response"],
            parsed_output=sample["parsed_output"],
            media_markdown=f"![Bundled sample image]({DEFAULT_IMAGE_URL})",
            media_url=DEFAULT_IMAGE_URL,
        )
    with gr.Tab("Audio Sample"):
        sample = SHOWCASE_SAMPLES["audio"]
        _render_showcase_sample(
            prompt=sample["prompt"],
            response=sample["response"],
            parsed_output=sample["parsed_output"],
            media_url=DEFAULT_AUDIO_URL,
        )
    with gr.Tab("Reasoning Note"):
        sample = SHOWCASE_SAMPLES["reasoning"]
        _render_showcase_sample(
            prompt=sample["prompt"],
            response=sample["response"],
            parsed_output=sample["parsed_output"],
        )
    with gr.Tab("Run It"):
        gr.Markdown(
            "### Local or GPU-backed run\n"
            "Use the packaged files directly for a real interactive run, or attach GPU hardware to this Space."
        )
        gr.Textbox(
            label="Quickstart",
            interactive=False,
            lines=4,
            value=(
                "pip install -r requirements.txt\n"
                "python quickstart.py\n"
                "python quickstart.py --mode image --image path-or-url\n"
                "python quickstart.py --mode audio --audio path-or-url"
            ),
        )


with gr.Blocks() as demo:
    if _has_supported_gpu_runtime():
        _build_live_ui()
    else:
        _build_showcase_ui()


if __name__ == "__main__":
    demo.queue().launch(show_error=True)
初始化项目，由ModelHub XC社区提供模型 Model: AbteeXAILab/lumynax-infused-qwen3-text-gguf Source: Original Platform 2026-06-06 09:18:19 +08:00			`from __future__ import annotations`

			`import json`
			`import os`
			`from pathlib import Path`
			`from threading import Lock`

			`import gradio as gr`
			`import torch`
			`from huggingface_hub import snapshot_download`
			`from transformers import AutoModelForMultimodalLM, AutoProcessor`

			`MODEL_TITLE = "LumynaX Infused Qwen3 Text GGUF"`
			`DEFAULT_MODEL_REPO_ID = "AbteeXAILab/lumynax-infused-qwen3-text-gguf"`
			`MODEL_REPO_ENV_VAR = "LUMYNAX_MODEL_REPO_ID"`
			`HF_TOKEN_ENV_VARS = ("HF_TOKEN", "HUGGING_FACE_HUB_TOKEN", "HUGGINGFACE_HUB_TOKEN")`
			`DEFAULT_IMAGE_URL = "https://raw.githubusercontent.com/google-gemma/cookbook/refs/heads/main/Demos/sample-data/GoldenGate.png"`
			`DEFAULT_AUDIO_URL = "https://raw.githubusercontent.com/google-gemma/cookbook/refs/heads/main/Demos/sample-data/journal1.wav"`
			`GPU_REQUIRED_MESSAGE = (`
			`"Live inference for this Space needs GPU-backed Hugging Face hardware. "`
			`"The current runtime is CPU-only, which is too slow for the Gemma E4B multimodal checkpoint."`
			`)`
			`SHOWCASE_MESSAGE = (`
			`"This Space is running in showcase mode on CPU hardware. "`
			`"The examples below were captured during package validation so people can still see how the model behaves. "`
			`"If GPU hardware is attached later, this same Space will switch back to live inference automatically."`
			`)`
			`SHOWCASE_SAMPLES = {`
			`"text": {`
			`"prompt": "Who are you? Reply in one short sentence.",`
			`"response": "I am LumynaX, operating from the LumynaX Infused Gemma E4B Model package.",`
			`"parsed_output": {`
			`"role": "assistant",`
			`"content": "I am LumynaX, operating from the LumynaX Infused Gemma E4B Model package.",`
			`},`
			`},`
			`"image": {`
			`"prompt": "What is shown in this image? Reply in under 12 words.",`
			`"response": "The iconic Golden Gate Bridge spans the water under a clear sky. I am LumynaX.",`
			`"parsed_output": {`
			`"role": "assistant",`
			`"content": "The iconic Golden Gate Bridge spans the water under a clear sky. I am LumynaX.",`
			`},`
			`},`
			`"audio": {`
			`"prompt": "Transcribe the speech in one line only.",`
			`"response": 'A local validation run transcribed the bundled sample audio and included: "My name is LumynaX."',`
			`"parsed_output": {`
			`"validation_summary": 'A local validation run transcribed the bundled sample audio and included: "My name is LumynaX."',`
			`},`
			`},`
			`"reasoning": {`
			`"prompt": "Explain what this package is in one short sentence.",`
			`"response": "Reasoning mode was verified locally and returned a non-empty structured thinking field.",`
			`"parsed_output": {`
			`"validation_summary": "Reasoning mode was verified locally and returned a non-empty structured thinking field.",`
			`},`
			`},`
			`}`

			`_MODEL = None`
			`_PROCESSOR = None`
			`_LOAD_ERROR = None`
			`_LOAD_LOCK = Lock()`


			`def _resolve_hf_token() -> str \| None:`
			`for env_var in HF_TOKEN_ENV_VARS:`
			`raw_value = os.environ.get(env_var, "").strip()`
			`if raw_value:`
			`return raw_value`
			`return None`


			`def _has_supported_gpu_runtime() -> bool:`
			`return bool(torch.cuda.is_available())`


			`def _load_runtime() -> tuple[object, object]:`
			`global _MODEL, _PROCESSOR, _LOAD_ERROR`

			`if _MODEL is not None and _PROCESSOR is not None:`
			`return _MODEL, _PROCESSOR`
			`if _LOAD_ERROR is not None:`
			`raise RuntimeError(_LOAD_ERROR)`

			`with _LOAD_LOCK:`
			`if _MODEL is not None and _PROCESSOR is not None:`
			`return _MODEL, _PROCESSOR`
			`if _LOAD_ERROR is not None:`
			`raise RuntimeError(_LOAD_ERROR)`

			`try:`
			`if not _has_supported_gpu_runtime():`
			`raise RuntimeError(GPU_REQUIRED_MESSAGE)`
			`repo_id = os.environ.get(MODEL_REPO_ENV_VAR, "").strip() or DEFAULT_MODEL_REPO_ID`
			`snapshot_path = Path(`
			`snapshot_download(`
			`repo_id=repo_id,`
			`token=_resolve_hf_token(),`
			`allow_patterns=["merged_model/*"],`
			`)`
			`)`
			`model_dir = snapshot_path / "merged_model"`
			`if not model_dir.exists():`
			`raise FileNotFoundError(f"Expected merged_model/ in {snapshot_path} after downloading {repo_id}.")`

			`processor = AutoProcessor.from_pretrained(str(model_dir))`
			`model = AutoModelForMultimodalLM.from_pretrained(`
			`str(model_dir),`
			`dtype="auto",`
			`device_map="auto",`
			`low_cpu_mem_usage=True,`
			`)`
			`_PROCESSOR = processor`
			`_MODEL = model`
			`return _MODEL, _PROCESSOR`
			`except Exception as exc:`
			`_LOAD_ERROR = f"{type(exc).__name__}: {exc}"`
			`raise`


			`def _resolve_media_reference(upload_value: str \| None, url_value: str \| None) -> str \| None:`
			`if isinstance(url_value, str) and url_value.strip():`
			`return url_value.strip()`
			`if isinstance(upload_value, str) and upload_value.strip():`
			`return upload_value.strip()`
			`return None`


			`def _extract_response_text(parsed: object) -> str:`
			`if isinstance(parsed, dict):`
			`content = parsed.get("content")`
			`if isinstance(content, str) and content.strip():`
			`return content.strip()`
			`if isinstance(parsed, str):`
			`return parsed.strip()`
			`return json.dumps(parsed, indent=2, ensure_ascii=False, default=str)`


			`def _format_json(value: object) -> str:`
			`return json.dumps(value, indent=2, ensure_ascii=False, default=str)`


			`def run_request(`
			`*,`
			`prompt: str,`
			`thinking: bool,`
			`max_new_tokens: int,`
			`image_upload: str \| None = None,`
			`image_url: str = "",`
			`audio_upload: str \| None = None,`
			`audio_url: str = "",`
			`) -> tuple[str, str]:`
			`if not prompt.strip():`
			`raise gr.Error("A prompt is required.")`

			`if not _has_supported_gpu_runtime():`
			`return GPU_REQUIRED_MESSAGE, _format_json({"error": GPU_REQUIRED_MESSAGE})`

			`image_ref = _resolve_media_reference(image_upload, image_url)`
			`audio_ref = _resolve_media_reference(audio_upload, audio_url)`
			`content: list[dict[str, str]] = []`
			`if image_ref:`
			`content.append({"type": "image", "url": image_ref})`
			`if audio_ref:`
			`content.append({"type": "audio", "audio": audio_ref})`
			`content.append({"type": "text", "text": prompt.strip()})`

			`messages = [`
			`{`
			`"role": "user",`
			`"content": content,`
			`},`
			`]`

			`model, processor = _load_runtime()`
			`inputs = processor.apply_chat_template(`
			`messages,`
			`tokenize=True,`
			`return_dict=True,`
			`return_tensors="pt",`
			`add_generation_prompt=True,`
			`enable_thinking=thinking,`
			`).to(model.device)`
			`input_len = inputs["input_ids"].shape[-1]`

			`with torch.inference_mode():`
			`outputs = model.generate(`
			`**inputs,`
			`max_new_tokens=int(max_new_tokens),`
			`do_sample=False,`
			`)`

			`response = processor.decode(outputs[0][input_len:], skip_special_tokens=False)`
			`parsed = processor.parse_response(response) if hasattr(processor, "parse_response") else response`
			`return _extract_response_text(parsed), _format_json(parsed)`


			`def run_text(prompt: str, thinking: bool, max_new_tokens: int) -> tuple[str, str]:`
			`return run_request(`
			`prompt=prompt,`
			`thinking=thinking,`
			`max_new_tokens=max_new_tokens,`
			`)`


			`def run_image(`
			`prompt: str,`
			`image_upload: str \| None,`
			`image_url: str,`
			`thinking: bool,`
			`max_new_tokens: int,`
			`) -> tuple[str, str]:`
			`return run_request(`
			`prompt=prompt,`
			`thinking=thinking,`
			`max_new_tokens=max_new_tokens,`
			`image_upload=image_upload,`
			`image_url=image_url,`
			`)`


			`def run_audio(`
			`prompt: str,`
			`audio_upload: str \| None,`
			`audio_url: str,`
			`thinking: bool,`
			`max_new_tokens: int,`
			`) -> tuple[str, str]:`
			`return run_request(`
			`prompt=prompt,`
			`thinking=thinking,`
			`max_new_tokens=max_new_tokens,`
			`audio_upload=audio_upload,`
			`audio_url=audio_url,`
			`)`


			`def _render_showcase_sample(`
			`*,`
			`prompt: str,`
			`response: str,`
			`parsed_output: object,`
			`media_markdown: str \| None = None,`
			`media_url: str \| None = None,`
			`) -> None:`
			`if media_markdown:`
			`gr.Markdown(media_markdown)`
			`if media_url:`
			`gr.Textbox(label="Sample Asset URL", value=media_url, interactive=False, lines=1)`
			`gr.Textbox(label="Example Prompt", value=prompt, interactive=False, lines=3)`
			`gr.Textbox(label="Example Response", value=response, interactive=False, lines=6)`
			`gr.Code(label="Example Parsed Output", value=_format_json(parsed_output), language="json")`


			`def _build_live_ui() -> None:`
			`gr.Markdown(`
			`f"# {MODEL_TITLE}\n\n"`
			`"Live multimodal demo mode is active because GPU hardware is available. "`
			`"The LumynaX identity comes from the packaged model template and is not user-editable here."`
			`)`
			`with gr.Tab("Text"):`
			`text_prompt = gr.Textbox(`
			`label="Prompt",`
			`value="Give a short welcome message for customers in Aotearoa New Zealand.",`
			`lines=4,`
			`)`
			`with gr.Row():`
			`text_thinking = gr.Checkbox(label="Enable Reasoning", value=False)`
			`text_max_tokens = gr.Slider(label="Max New Tokens", minimum=16, maximum=256, value=64, step=16)`
			`text_run = gr.Button("Run Text Demo", variant="primary")`
			`text_answer = gr.Textbox(label="Response", lines=8)`
			`text_debug = gr.Code(label="Parsed Output", language="json")`
			`text_run.click(`
			`run_text,`
			`inputs=[text_prompt, text_thinking, text_max_tokens],`
			`outputs=[text_answer, text_debug],`
			`)`

			`with gr.Tab("Image"):`
			`image_prompt = gr.Textbox(`
			`label="Prompt",`
			`value="What is shown in this image? Reply in under 12 words.",`
			`lines=3,`
			`)`
			`image_upload = gr.Image(label="Upload Image", type="filepath")`
			`image_url = gr.Textbox(label="Or Image URL", value=DEFAULT_IMAGE_URL)`
			`with gr.Row():`
			`image_thinking = gr.Checkbox(label="Enable Reasoning", value=False)`
			`image_max_tokens = gr.Slider(label="Max New Tokens", minimum=16, maximum=256, value=64, step=16)`
			`image_run = gr.Button("Run Image Demo", variant="primary")`
			`image_answer = gr.Textbox(label="Response", lines=8)`
			`image_debug = gr.Code(label="Parsed Output", language="json")`
			`image_run.click(`
			`run_image,`
			`inputs=[image_prompt, image_upload, image_url, image_thinking, image_max_tokens],`
			`outputs=[image_answer, image_debug],`
			`)`

			`with gr.Tab("Audio"):`
			`audio_prompt = gr.Textbox(`
			`label="Prompt",`
			`value="Transcribe the speech in one line only.",`
			`lines=3,`
			`)`
			`audio_upload = gr.Audio(label="Upload Audio", type="filepath")`
			`audio_url = gr.Textbox(label="Or Audio URL", value=DEFAULT_AUDIO_URL)`
			`with gr.Row():`
			`audio_thinking = gr.Checkbox(label="Enable Reasoning", value=False)`
			`audio_max_tokens = gr.Slider(label="Max New Tokens", minimum=16, maximum=256, value=64, step=16)`
			`audio_run = gr.Button("Run Audio Demo", variant="primary")`
			`audio_answer = gr.Textbox(label="Response", lines=8)`
			`audio_debug = gr.Code(label="Parsed Output", language="json")`
			`audio_run.click(`
			`run_audio,`
			`inputs=[audio_prompt, audio_upload, audio_url, audio_thinking, audio_max_tokens],`
			`outputs=[audio_answer, audio_debug],`
			`)`


			`def _build_showcase_ui() -> None:`
			`gr.Markdown(`
			`f"# {MODEL_TITLE}\n\n"`
			`f"{SHOWCASE_MESSAGE}\n\n"`
			`"This is still the real package identity and real package structure, but not live inference on this CPU-only Space."`
			`)`
			`with gr.Tab("Overview"):`
			`gr.Markdown(`
			`"### What this Space is showing\n"`
			`"- verified text, image, audio, and reasoning examples from package validation\n"`
			`"- the real packaged Gemma E4B release structure and LumynaX identity behavior\n"`
			`"- honest provenance: packaged upstream Gemma weights under a LumynaX runtime identity\n\n"`
			`"### Why this is showcase mode\n"`
			"- Hugging Face `cpu-basic` cannot serve this checkpoint interactively\n"
			`"- the same Space will switch to live inference automatically if GPU hardware is added later"`
			`)`
			`with gr.Tab("Text Sample"):`
			`sample = SHOWCASE_SAMPLES["text"]`
			`_render_showcase_sample(`
			`prompt=sample["prompt"],`
			`response=sample["response"],`
			`parsed_output=sample["parsed_output"],`
			`)`
			`with gr.Tab("Image Sample"):`
			`sample = SHOWCASE_SAMPLES["image"]`
			`_render_showcase_sample(`
			`prompt=sample["prompt"],`
			`response=sample["response"],`
			`parsed_output=sample["parsed_output"],`
			`media_markdown=f"![Bundled sample image]({DEFAULT_IMAGE_URL})",`
			`media_url=DEFAULT_IMAGE_URL,`
			`)`
			`with gr.Tab("Audio Sample"):`
			`sample = SHOWCASE_SAMPLES["audio"]`
			`_render_showcase_sample(`
			`prompt=sample["prompt"],`
			`response=sample["response"],`
			`parsed_output=sample["parsed_output"],`
			`media_url=DEFAULT_AUDIO_URL,`
			`)`
			`with gr.Tab("Reasoning Note"):`
			`sample = SHOWCASE_SAMPLES["reasoning"]`
			`_render_showcase_sample(`
			`prompt=sample["prompt"],`
			`response=sample["response"],`
			`parsed_output=sample["parsed_output"],`
			`)`
			`with gr.Tab("Run It"):`
			`gr.Markdown(`
			`"### Local or GPU-backed run\n"`
			`"Use the packaged files directly for a real interactive run, or attach GPU hardware to this Space."`
			`)`
			`gr.Textbox(`
			`label="Quickstart",`
			`interactive=False,`
			`lines=4,`
			`value=(`
			`"pip install -r requirements.txt\n"`
			`"python quickstart.py\n"`
			`"python quickstart.py --mode image --image path-or-url\n"`
			`"python quickstart.py --mode audio --audio path-or-url"`
			`),`
			`)`


			`with gr.Blocks() as demo:`
			`if _has_supported_gpu_runtime():`
			`_build_live_ui()`
			`else:`
			`_build_showcase_ui()`


			`if __name__ == "__main__":`
			`demo.queue().launch(show_error=True)`