初始化项目,由ModelHub XC社区提供模型
Model: AbteeXAILab/lumynax-infused-qwen3-text-gguf Source: Original Platform
This commit is contained in:
35
hf_space/README.md
Normal file
35
hf_space/README.md
Normal file
@@ -0,0 +1,35 @@
|
||||
---
|
||||
title: LumynaX Infused Qwen3 Text GGUF Demo
|
||||
colorFrom: green
|
||||
colorTo: blue
|
||||
sdk: gradio
|
||||
app_file: app.py
|
||||
pinned: false
|
||||
short_description: Private LumynaX Gemma E4B demo.
|
||||
---
|
||||
|
||||
# LumynaX Infused Qwen3 Text GGUF Demo
|
||||
|
||||
Private demo for the `lumynax-infused-qwen3-text-gguf` release line.
|
||||
|
||||
## Supported Demo Modes
|
||||
|
||||
- text with reasoning toggle
|
||||
- image understanding from upload or URL
|
||||
- audio understanding / transcription from upload or URL
|
||||
|
||||
## Private Deployment Notes
|
||||
|
||||
- this Space is intended to stay private for now
|
||||
- the backing model repo should be `AbteeXAILab/lumynax-infused-qwen3-text-gguf`
|
||||
- if that model repo is private, set an `HF_TOKEN` Space secret with read access
|
||||
- on CPU-only Hugging Face hardware this Space automatically falls back to showcase mode instead of live inference
|
||||
- if GPU hardware is later attached, the same Space switches back to live multimodal inference
|
||||
- the package chat template already hardcodes the LumynaX identity inside `merged_model/chat_template.jinja`
|
||||
- live inference for this Gemma E4B package still requires GPU-backed Space hardware; `cpu-basic` is not sufficient
|
||||
|
||||
## Important Provenance
|
||||
|
||||
This demo is branded as `LumynaX Infused Qwen3 Text GGUF`, but it serves the official upstream
|
||||
`google/gemma-4-E4B-it` base weights packaged under the LumynaX release identity.
|
||||
It does not claim a private LumynaX fine-tune of the checkpoint.
|
||||
395
hf_space/app.py
Normal file
395
hf_space/app.py
Normal file
@@ -0,0 +1,395 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from threading import Lock
|
||||
|
||||
import gradio as gr
|
||||
import torch
|
||||
from huggingface_hub import snapshot_download
|
||||
from transformers import AutoModelForMultimodalLM, AutoProcessor
|
||||
|
||||
MODEL_TITLE = "LumynaX Infused Qwen3 Text GGUF"
|
||||
DEFAULT_MODEL_REPO_ID = "AbteeXAILab/lumynax-infused-qwen3-text-gguf"
|
||||
MODEL_REPO_ENV_VAR = "LUMYNAX_MODEL_REPO_ID"
|
||||
HF_TOKEN_ENV_VARS = ("HF_TOKEN", "HUGGING_FACE_HUB_TOKEN", "HUGGINGFACE_HUB_TOKEN")
|
||||
DEFAULT_IMAGE_URL = "https://raw.githubusercontent.com/google-gemma/cookbook/refs/heads/main/Demos/sample-data/GoldenGate.png"
|
||||
DEFAULT_AUDIO_URL = "https://raw.githubusercontent.com/google-gemma/cookbook/refs/heads/main/Demos/sample-data/journal1.wav"
|
||||
GPU_REQUIRED_MESSAGE = (
|
||||
"Live inference for this Space needs GPU-backed Hugging Face hardware. "
|
||||
"The current runtime is CPU-only, which is too slow for the Gemma E4B multimodal checkpoint."
|
||||
)
|
||||
SHOWCASE_MESSAGE = (
|
||||
"This Space is running in showcase mode on CPU hardware. "
|
||||
"The examples below were captured during package validation so people can still see how the model behaves. "
|
||||
"If GPU hardware is attached later, this same Space will switch back to live inference automatically."
|
||||
)
|
||||
SHOWCASE_SAMPLES = {
|
||||
"text": {
|
||||
"prompt": "Who are you? Reply in one short sentence.",
|
||||
"response": "I am LumynaX, operating from the LumynaX Infused Gemma E4B Model package.",
|
||||
"parsed_output": {
|
||||
"role": "assistant",
|
||||
"content": "I am LumynaX, operating from the LumynaX Infused Gemma E4B Model package.",
|
||||
},
|
||||
},
|
||||
"image": {
|
||||
"prompt": "What is shown in this image? Reply in under 12 words.",
|
||||
"response": "The iconic Golden Gate Bridge spans the water under a clear sky. I am LumynaX.",
|
||||
"parsed_output": {
|
||||
"role": "assistant",
|
||||
"content": "The iconic Golden Gate Bridge spans the water under a clear sky. I am LumynaX.",
|
||||
},
|
||||
},
|
||||
"audio": {
|
||||
"prompt": "Transcribe the speech in one line only.",
|
||||
"response": 'A local validation run transcribed the bundled sample audio and included: "My name is LumynaX."',
|
||||
"parsed_output": {
|
||||
"validation_summary": 'A local validation run transcribed the bundled sample audio and included: "My name is LumynaX."',
|
||||
},
|
||||
},
|
||||
"reasoning": {
|
||||
"prompt": "Explain what this package is in one short sentence.",
|
||||
"response": "Reasoning mode was verified locally and returned a non-empty structured thinking field.",
|
||||
"parsed_output": {
|
||||
"validation_summary": "Reasoning mode was verified locally and returned a non-empty structured thinking field.",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
_MODEL = None
|
||||
_PROCESSOR = None
|
||||
_LOAD_ERROR = None
|
||||
_LOAD_LOCK = Lock()
|
||||
|
||||
|
||||
def _resolve_hf_token() -> str | None:
|
||||
for env_var in HF_TOKEN_ENV_VARS:
|
||||
raw_value = os.environ.get(env_var, "").strip()
|
||||
if raw_value:
|
||||
return raw_value
|
||||
return None
|
||||
|
||||
|
||||
def _has_supported_gpu_runtime() -> bool:
|
||||
return bool(torch.cuda.is_available())
|
||||
|
||||
|
||||
def _load_runtime() -> tuple[object, object]:
|
||||
global _MODEL, _PROCESSOR, _LOAD_ERROR
|
||||
|
||||
if _MODEL is not None and _PROCESSOR is not None:
|
||||
return _MODEL, _PROCESSOR
|
||||
if _LOAD_ERROR is not None:
|
||||
raise RuntimeError(_LOAD_ERROR)
|
||||
|
||||
with _LOAD_LOCK:
|
||||
if _MODEL is not None and _PROCESSOR is not None:
|
||||
return _MODEL, _PROCESSOR
|
||||
if _LOAD_ERROR is not None:
|
||||
raise RuntimeError(_LOAD_ERROR)
|
||||
|
||||
try:
|
||||
if not _has_supported_gpu_runtime():
|
||||
raise RuntimeError(GPU_REQUIRED_MESSAGE)
|
||||
repo_id = os.environ.get(MODEL_REPO_ENV_VAR, "").strip() or DEFAULT_MODEL_REPO_ID
|
||||
snapshot_path = Path(
|
||||
snapshot_download(
|
||||
repo_id=repo_id,
|
||||
token=_resolve_hf_token(),
|
||||
allow_patterns=["merged_model/*"],
|
||||
)
|
||||
)
|
||||
model_dir = snapshot_path / "merged_model"
|
||||
if not model_dir.exists():
|
||||
raise FileNotFoundError(f"Expected merged_model/ in {snapshot_path} after downloading {repo_id}.")
|
||||
|
||||
processor = AutoProcessor.from_pretrained(str(model_dir))
|
||||
model = AutoModelForMultimodalLM.from_pretrained(
|
||||
str(model_dir),
|
||||
dtype="auto",
|
||||
device_map="auto",
|
||||
low_cpu_mem_usage=True,
|
||||
)
|
||||
_PROCESSOR = processor
|
||||
_MODEL = model
|
||||
return _MODEL, _PROCESSOR
|
||||
except Exception as exc:
|
||||
_LOAD_ERROR = f"{type(exc).__name__}: {exc}"
|
||||
raise
|
||||
|
||||
|
||||
def _resolve_media_reference(upload_value: str | None, url_value: str | None) -> str | None:
|
||||
if isinstance(url_value, str) and url_value.strip():
|
||||
return url_value.strip()
|
||||
if isinstance(upload_value, str) and upload_value.strip():
|
||||
return upload_value.strip()
|
||||
return None
|
||||
|
||||
|
||||
def _extract_response_text(parsed: object) -> str:
|
||||
if isinstance(parsed, dict):
|
||||
content = parsed.get("content")
|
||||
if isinstance(content, str) and content.strip():
|
||||
return content.strip()
|
||||
if isinstance(parsed, str):
|
||||
return parsed.strip()
|
||||
return json.dumps(parsed, indent=2, ensure_ascii=False, default=str)
|
||||
|
||||
|
||||
def _format_json(value: object) -> str:
|
||||
return json.dumps(value, indent=2, ensure_ascii=False, default=str)
|
||||
|
||||
|
||||
def run_request(
|
||||
*,
|
||||
prompt: str,
|
||||
thinking: bool,
|
||||
max_new_tokens: int,
|
||||
image_upload: str | None = None,
|
||||
image_url: str = "",
|
||||
audio_upload: str | None = None,
|
||||
audio_url: str = "",
|
||||
) -> tuple[str, str]:
|
||||
if not prompt.strip():
|
||||
raise gr.Error("A prompt is required.")
|
||||
|
||||
if not _has_supported_gpu_runtime():
|
||||
return GPU_REQUIRED_MESSAGE, _format_json({"error": GPU_REQUIRED_MESSAGE})
|
||||
|
||||
image_ref = _resolve_media_reference(image_upload, image_url)
|
||||
audio_ref = _resolve_media_reference(audio_upload, audio_url)
|
||||
content: list[dict[str, str]] = []
|
||||
if image_ref:
|
||||
content.append({"type": "image", "url": image_ref})
|
||||
if audio_ref:
|
||||
content.append({"type": "audio", "audio": audio_ref})
|
||||
content.append({"type": "text", "text": prompt.strip()})
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": content,
|
||||
},
|
||||
]
|
||||
|
||||
model, processor = _load_runtime()
|
||||
inputs = processor.apply_chat_template(
|
||||
messages,
|
||||
tokenize=True,
|
||||
return_dict=True,
|
||||
return_tensors="pt",
|
||||
add_generation_prompt=True,
|
||||
enable_thinking=thinking,
|
||||
).to(model.device)
|
||||
input_len = inputs["input_ids"].shape[-1]
|
||||
|
||||
with torch.inference_mode():
|
||||
outputs = model.generate(
|
||||
**inputs,
|
||||
max_new_tokens=int(max_new_tokens),
|
||||
do_sample=False,
|
||||
)
|
||||
|
||||
response = processor.decode(outputs[0][input_len:], skip_special_tokens=False)
|
||||
parsed = processor.parse_response(response) if hasattr(processor, "parse_response") else response
|
||||
return _extract_response_text(parsed), _format_json(parsed)
|
||||
|
||||
|
||||
def run_text(prompt: str, thinking: bool, max_new_tokens: int) -> tuple[str, str]:
|
||||
return run_request(
|
||||
prompt=prompt,
|
||||
thinking=thinking,
|
||||
max_new_tokens=max_new_tokens,
|
||||
)
|
||||
|
||||
|
||||
def run_image(
|
||||
prompt: str,
|
||||
image_upload: str | None,
|
||||
image_url: str,
|
||||
thinking: bool,
|
||||
max_new_tokens: int,
|
||||
) -> tuple[str, str]:
|
||||
return run_request(
|
||||
prompt=prompt,
|
||||
thinking=thinking,
|
||||
max_new_tokens=max_new_tokens,
|
||||
image_upload=image_upload,
|
||||
image_url=image_url,
|
||||
)
|
||||
|
||||
|
||||
def run_audio(
|
||||
prompt: str,
|
||||
audio_upload: str | None,
|
||||
audio_url: str,
|
||||
thinking: bool,
|
||||
max_new_tokens: int,
|
||||
) -> tuple[str, str]:
|
||||
return run_request(
|
||||
prompt=prompt,
|
||||
thinking=thinking,
|
||||
max_new_tokens=max_new_tokens,
|
||||
audio_upload=audio_upload,
|
||||
audio_url=audio_url,
|
||||
)
|
||||
|
||||
|
||||
def _render_showcase_sample(
|
||||
*,
|
||||
prompt: str,
|
||||
response: str,
|
||||
parsed_output: object,
|
||||
media_markdown: str | None = None,
|
||||
media_url: str | None = None,
|
||||
) -> None:
|
||||
if media_markdown:
|
||||
gr.Markdown(media_markdown)
|
||||
if media_url:
|
||||
gr.Textbox(label="Sample Asset URL", value=media_url, interactive=False, lines=1)
|
||||
gr.Textbox(label="Example Prompt", value=prompt, interactive=False, lines=3)
|
||||
gr.Textbox(label="Example Response", value=response, interactive=False, lines=6)
|
||||
gr.Code(label="Example Parsed Output", value=_format_json(parsed_output), language="json")
|
||||
|
||||
|
||||
def _build_live_ui() -> None:
|
||||
gr.Markdown(
|
||||
f"# {MODEL_TITLE}\n\n"
|
||||
"Live multimodal demo mode is active because GPU hardware is available. "
|
||||
"The LumynaX identity comes from the packaged model template and is not user-editable here."
|
||||
)
|
||||
with gr.Tab("Text"):
|
||||
text_prompt = gr.Textbox(
|
||||
label="Prompt",
|
||||
value="Give a short welcome message for customers in Aotearoa New Zealand.",
|
||||
lines=4,
|
||||
)
|
||||
with gr.Row():
|
||||
text_thinking = gr.Checkbox(label="Enable Reasoning", value=False)
|
||||
text_max_tokens = gr.Slider(label="Max New Tokens", minimum=16, maximum=256, value=64, step=16)
|
||||
text_run = gr.Button("Run Text Demo", variant="primary")
|
||||
text_answer = gr.Textbox(label="Response", lines=8)
|
||||
text_debug = gr.Code(label="Parsed Output", language="json")
|
||||
text_run.click(
|
||||
run_text,
|
||||
inputs=[text_prompt, text_thinking, text_max_tokens],
|
||||
outputs=[text_answer, text_debug],
|
||||
)
|
||||
|
||||
with gr.Tab("Image"):
|
||||
image_prompt = gr.Textbox(
|
||||
label="Prompt",
|
||||
value="What is shown in this image? Reply in under 12 words.",
|
||||
lines=3,
|
||||
)
|
||||
image_upload = gr.Image(label="Upload Image", type="filepath")
|
||||
image_url = gr.Textbox(label="Or Image URL", value=DEFAULT_IMAGE_URL)
|
||||
with gr.Row():
|
||||
image_thinking = gr.Checkbox(label="Enable Reasoning", value=False)
|
||||
image_max_tokens = gr.Slider(label="Max New Tokens", minimum=16, maximum=256, value=64, step=16)
|
||||
image_run = gr.Button("Run Image Demo", variant="primary")
|
||||
image_answer = gr.Textbox(label="Response", lines=8)
|
||||
image_debug = gr.Code(label="Parsed Output", language="json")
|
||||
image_run.click(
|
||||
run_image,
|
||||
inputs=[image_prompt, image_upload, image_url, image_thinking, image_max_tokens],
|
||||
outputs=[image_answer, image_debug],
|
||||
)
|
||||
|
||||
with gr.Tab("Audio"):
|
||||
audio_prompt = gr.Textbox(
|
||||
label="Prompt",
|
||||
value="Transcribe the speech in one line only.",
|
||||
lines=3,
|
||||
)
|
||||
audio_upload = gr.Audio(label="Upload Audio", type="filepath")
|
||||
audio_url = gr.Textbox(label="Or Audio URL", value=DEFAULT_AUDIO_URL)
|
||||
with gr.Row():
|
||||
audio_thinking = gr.Checkbox(label="Enable Reasoning", value=False)
|
||||
audio_max_tokens = gr.Slider(label="Max New Tokens", minimum=16, maximum=256, value=64, step=16)
|
||||
audio_run = gr.Button("Run Audio Demo", variant="primary")
|
||||
audio_answer = gr.Textbox(label="Response", lines=8)
|
||||
audio_debug = gr.Code(label="Parsed Output", language="json")
|
||||
audio_run.click(
|
||||
run_audio,
|
||||
inputs=[audio_prompt, audio_upload, audio_url, audio_thinking, audio_max_tokens],
|
||||
outputs=[audio_answer, audio_debug],
|
||||
)
|
||||
|
||||
|
||||
def _build_showcase_ui() -> None:
|
||||
gr.Markdown(
|
||||
f"# {MODEL_TITLE}\n\n"
|
||||
f"{SHOWCASE_MESSAGE}\n\n"
|
||||
"This is still the real package identity and real package structure, but not live inference on this CPU-only Space."
|
||||
)
|
||||
with gr.Tab("Overview"):
|
||||
gr.Markdown(
|
||||
"### What this Space is showing\n"
|
||||
"- verified text, image, audio, and reasoning examples from package validation\n"
|
||||
"- the real packaged Gemma E4B release structure and LumynaX identity behavior\n"
|
||||
"- honest provenance: packaged upstream Gemma weights under a LumynaX runtime identity\n\n"
|
||||
"### Why this is showcase mode\n"
|
||||
"- Hugging Face `cpu-basic` cannot serve this checkpoint interactively\n"
|
||||
"- the same Space will switch to live inference automatically if GPU hardware is added later"
|
||||
)
|
||||
with gr.Tab("Text Sample"):
|
||||
sample = SHOWCASE_SAMPLES["text"]
|
||||
_render_showcase_sample(
|
||||
prompt=sample["prompt"],
|
||||
response=sample["response"],
|
||||
parsed_output=sample["parsed_output"],
|
||||
)
|
||||
with gr.Tab("Image Sample"):
|
||||
sample = SHOWCASE_SAMPLES["image"]
|
||||
_render_showcase_sample(
|
||||
prompt=sample["prompt"],
|
||||
response=sample["response"],
|
||||
parsed_output=sample["parsed_output"],
|
||||
media_markdown=f"",
|
||||
media_url=DEFAULT_IMAGE_URL,
|
||||
)
|
||||
with gr.Tab("Audio Sample"):
|
||||
sample = SHOWCASE_SAMPLES["audio"]
|
||||
_render_showcase_sample(
|
||||
prompt=sample["prompt"],
|
||||
response=sample["response"],
|
||||
parsed_output=sample["parsed_output"],
|
||||
media_url=DEFAULT_AUDIO_URL,
|
||||
)
|
||||
with gr.Tab("Reasoning Note"):
|
||||
sample = SHOWCASE_SAMPLES["reasoning"]
|
||||
_render_showcase_sample(
|
||||
prompt=sample["prompt"],
|
||||
response=sample["response"],
|
||||
parsed_output=sample["parsed_output"],
|
||||
)
|
||||
with gr.Tab("Run It"):
|
||||
gr.Markdown(
|
||||
"### Local or GPU-backed run\n"
|
||||
"Use the packaged files directly for a real interactive run, or attach GPU hardware to this Space."
|
||||
)
|
||||
gr.Textbox(
|
||||
label="Quickstart",
|
||||
interactive=False,
|
||||
lines=4,
|
||||
value=(
|
||||
"pip install -r requirements.txt\n"
|
||||
"python quickstart.py\n"
|
||||
"python quickstart.py --mode image --image path-or-url\n"
|
||||
"python quickstart.py --mode audio --audio path-or-url"
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
with gr.Blocks() as demo:
|
||||
if _has_supported_gpu_runtime():
|
||||
_build_live_ui()
|
||||
else:
|
||||
_build_showcase_ui()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.queue().launch(show_error=True)
|
||||
10
hf_space/requirements.txt
Normal file
10
hf_space/requirements.txt
Normal file
@@ -0,0 +1,10 @@
|
||||
accelerate>=1.13
|
||||
gradio>=5.0
|
||||
huggingface-hub>=1.8
|
||||
librosa>=0.11
|
||||
numba>=0.65
|
||||
pillow>=10.0
|
||||
safetensors>=0.6
|
||||
torch>=2.9
|
||||
torchvision>=0.24
|
||||
transformers>=5.5.3
|
||||
Reference in New Issue
Block a user