2025-09-17 19:29:13 +02:00
|
|
|
import { config } from '$lib/stores/settings.svelte';
|
webui: introduce OpenAI-compatible model selector in JSON payload (#16562)
* webui: introduce OpenAI-compatible model selector in JSON payload
* webui: restore OpenAI-Compatible model source of truth and unify metadata capture
This change re-establishes a single, reliable source of truth for the active model:
fully aligned with the OpenAI-Compat API behavior
It introduces a unified metadata flow that captures the model field from both
streaming and non-streaming responses, wiring a new onModel callback through ChatService
The model name is now resolved directly from the API payload rather than relying on
server /props or UI assumptions
ChatStore records and persists the resolved model for each assistant message during
streaming, ensuring consistency across the UI and database
Type definitions for API and settings were also extended to include model metadata
and the onModel callback, completing the alignment with OpenAI-Compat semantics
* webui: address review feedback from allozaur
* webui: move model selector into ChatForm (idea by @allozaur)
* webui: make model selector more subtle and integrated into ChatForm
* webui: replaced the Flowbite selector with a native Svelte dropdown
* webui: add developer setting to toggle the chat model selector
* webui: address review feedback from allozaur
Normalized streamed model names during chat updates
by trimming input and removing directory components before saving
or persisting them, so the conversation UI shows only the filename
Forced model names within the chat form selector dropdown to render as
a single-line, truncated entry with a tooltip revealing the full name
* webui: toggle displayed model source for legacy vs OpenAI-Compat modes
When the selector is disabled, it falls back to the active server model name from /props
When the model selector is enabled, the displayed model comes from the message metadata
(the one explicitly selected and sent in the request)
* Update tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/constants/localstorage-keys.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormModelSelector.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/services/chat.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/services/chat.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: refactor model selector and persistence helpers
- Replace inline portal and event listeners with proper Svelte bindings
- Introduce 'persisted' store helper for localStorage sync without runes
- Extract 'normalizeModelName' utils + Vitest coverage
- Simplify ChatFormModelSelector structure and cleanup logic
Replaced the persisted store helper's use of '$state/$effect' runes with
a plain TS implementation to prevent orphaned effect runtime errors
outside component context
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: document normalizeModelName usage with inline examples
* Update tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormModelSelector.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/stores/models.svelte.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/stores/models.svelte.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: extract ModelOption type into dedicated models.d.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: refine ChatMessageAssistant displayedModel source logic
* webui: stabilize dropdown, simplify model extraction, and init assistant model field
* chore: update webui static build
* Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* chore: npm format, update webui static build
* webui: align sidebar trigger position, remove z-index glitch
* chore: update webui build output
---------
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
2025-10-22 16:58:23 +02:00
|
|
|
import { selectedModelName } from '$lib/stores/models.svelte';
|
2025-09-17 19:29:13 +02:00
|
|
|
import { slotsService } from './slots';
|
|
|
|
|
/**
|
|
|
|
|
* ChatService - Low-level API communication layer for llama.cpp server interactions
|
|
|
|
|
*
|
|
|
|
|
* This service handles direct communication with the llama.cpp server's chat completion API.
|
|
|
|
|
* It provides the network layer abstraction for AI model interactions while remaining
|
|
|
|
|
* stateless and focused purely on API communication.
|
|
|
|
|
*
|
|
|
|
|
* **Architecture & Relationship with ChatStore:**
|
|
|
|
|
* - **ChatService** (this class): Stateless API communication layer
|
|
|
|
|
* - Handles HTTP requests/responses with llama.cpp server
|
|
|
|
|
* - Manages streaming and non-streaming response parsing
|
|
|
|
|
* - Provides request abortion capabilities
|
|
|
|
|
* - Converts database messages to API format
|
2025-10-12 18:06:41 +02:00
|
|
|
* - Handles error translation for server responses
|
2025-09-17 19:29:13 +02:00
|
|
|
*
|
|
|
|
|
* - **ChatStore**: Stateful orchestration and UI state management
|
|
|
|
|
* - Uses ChatService for all AI model communication
|
|
|
|
|
* - Manages conversation state, message history, and UI reactivity
|
|
|
|
|
* - Coordinates with DatabaseStore for persistence
|
|
|
|
|
* - Handles complex workflows like branching and regeneration
|
|
|
|
|
*
|
|
|
|
|
* **Key Responsibilities:**
|
|
|
|
|
* - Message format conversion (DatabaseMessage → API format)
|
|
|
|
|
* - Streaming response handling with real-time callbacks
|
|
|
|
|
* - Reasoning content extraction and processing
|
|
|
|
|
* - File attachment processing (images, PDFs, audio, text)
|
|
|
|
|
* - Request lifecycle management (abort, cleanup)
|
|
|
|
|
*/
|
|
|
|
|
export class ChatService {
|
2025-10-20 12:41:13 +02:00
|
|
|
private abortControllers: Map<string, AbortController> = new Map();
|
2025-09-17 19:29:13 +02:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Sends a chat completion request to the llama.cpp server.
|
|
|
|
|
* Supports both streaming and non-streaming responses with comprehensive parameter configuration.
|
|
|
|
|
* Automatically converts database messages with attachments to the appropriate API format.
|
|
|
|
|
*
|
|
|
|
|
* @param messages - Array of chat messages to send to the API (supports both ApiChatMessageData and DatabaseMessage with attachments)
|
|
|
|
|
* @param options - Configuration options for the chat completion request. See `SettingsChatServiceOptions` type for details.
|
|
|
|
|
* @returns {Promise<string | void>} that resolves to the complete response string (non-streaming) or void (streaming)
|
|
|
|
|
* @throws {Error} if the request fails or is aborted
|
|
|
|
|
*/
|
|
|
|
|
async sendMessage(
|
|
|
|
|
messages: ApiChatMessageData[] | (DatabaseMessage & { extra?: DatabaseMessageExtra[] })[],
|
2025-10-20 12:41:13 +02:00
|
|
|
options: SettingsChatServiceOptions = {},
|
|
|
|
|
conversationId?: string
|
2025-09-17 19:29:13 +02:00
|
|
|
): Promise<string | void> {
|
|
|
|
|
const {
|
|
|
|
|
stream,
|
|
|
|
|
onChunk,
|
|
|
|
|
onComplete,
|
|
|
|
|
onError,
|
webui: introduce OpenAI-compatible model selector in JSON payload (#16562)
* webui: introduce OpenAI-compatible model selector in JSON payload
* webui: restore OpenAI-Compatible model source of truth and unify metadata capture
This change re-establishes a single, reliable source of truth for the active model:
fully aligned with the OpenAI-Compat API behavior
It introduces a unified metadata flow that captures the model field from both
streaming and non-streaming responses, wiring a new onModel callback through ChatService
The model name is now resolved directly from the API payload rather than relying on
server /props or UI assumptions
ChatStore records and persists the resolved model for each assistant message during
streaming, ensuring consistency across the UI and database
Type definitions for API and settings were also extended to include model metadata
and the onModel callback, completing the alignment with OpenAI-Compat semantics
* webui: address review feedback from allozaur
* webui: move model selector into ChatForm (idea by @allozaur)
* webui: make model selector more subtle and integrated into ChatForm
* webui: replaced the Flowbite selector with a native Svelte dropdown
* webui: add developer setting to toggle the chat model selector
* webui: address review feedback from allozaur
Normalized streamed model names during chat updates
by trimming input and removing directory components before saving
or persisting them, so the conversation UI shows only the filename
Forced model names within the chat form selector dropdown to render as
a single-line, truncated entry with a tooltip revealing the full name
* webui: toggle displayed model source for legacy vs OpenAI-Compat modes
When the selector is disabled, it falls back to the active server model name from /props
When the model selector is enabled, the displayed model comes from the message metadata
(the one explicitly selected and sent in the request)
* Update tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/constants/localstorage-keys.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormModelSelector.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/services/chat.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/services/chat.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: refactor model selector and persistence helpers
- Replace inline portal and event listeners with proper Svelte bindings
- Introduce 'persisted' store helper for localStorage sync without runes
- Extract 'normalizeModelName' utils + Vitest coverage
- Simplify ChatFormModelSelector structure and cleanup logic
Replaced the persisted store helper's use of '$state/$effect' runes with
a plain TS implementation to prevent orphaned effect runtime errors
outside component context
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: document normalizeModelName usage with inline examples
* Update tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormModelSelector.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/stores/models.svelte.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/stores/models.svelte.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: extract ModelOption type into dedicated models.d.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: refine ChatMessageAssistant displayedModel source logic
* webui: stabilize dropdown, simplify model extraction, and init assistant model field
* chore: update webui static build
* Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* chore: npm format, update webui static build
* webui: align sidebar trigger position, remove z-index glitch
* chore: update webui build output
---------
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
2025-10-22 16:58:23 +02:00
|
|
|
onReasoningChunk,
|
|
|
|
|
onModel,
|
2025-09-17 19:29:13 +02:00
|
|
|
// Generation parameters
|
|
|
|
|
temperature,
|
|
|
|
|
max_tokens,
|
|
|
|
|
// Sampling parameters
|
|
|
|
|
dynatemp_range,
|
|
|
|
|
dynatemp_exponent,
|
|
|
|
|
top_k,
|
|
|
|
|
top_p,
|
|
|
|
|
min_p,
|
|
|
|
|
xtc_probability,
|
|
|
|
|
xtc_threshold,
|
|
|
|
|
typ_p,
|
|
|
|
|
// Penalty parameters
|
|
|
|
|
repeat_last_n,
|
|
|
|
|
repeat_penalty,
|
|
|
|
|
presence_penalty,
|
|
|
|
|
frequency_penalty,
|
|
|
|
|
dry_multiplier,
|
|
|
|
|
dry_base,
|
|
|
|
|
dry_allowed_length,
|
|
|
|
|
dry_penalty_last_n,
|
|
|
|
|
// Other parameters
|
|
|
|
|
samplers,
|
|
|
|
|
custom,
|
|
|
|
|
timings_per_token
|
|
|
|
|
} = options;
|
|
|
|
|
|
refactor: centralize CoT parsing in backend for streaming mode (#16394)
* refactor: unify reasoning handling via backend reasoning_content, drop frontend tag parsing
- Updated the chat message component to surface backend-supplied reasoning via message.thinking while showing the raw assistant content without inline tag scrubbing
- Simplified chat streaming to append content chunks directly, stream reasoning into the message model, and persist any partial reasoning when generation stops
- Refactored the chat service SSE handler to rely on server-provided reasoning_content, removing legacy <think> parsing logic
- Refreshed Storybook data and streaming flows to populate the thinking field explicitly for static and streaming assistant messages
* refactor: implement streaming-aware universal reasoning parser
Remove the streaming mode limitation from --reasoning-format by refactoring
try_parse_reasoning() to handle incremental parsing of <think> tags across
all formats.
- Rework try_parse_reasoning() to track whitespace, partial tags, and
multiple reasoning segments, allowing proper separation of reasoning_content
and content in streaming mode
- Parse reasoning tags before tool call handling in content-only and Llama 3.x
formats to ensure inline <think> blocks are captured correctly
- Change default reasoning_format from 'auto' to 'deepseek' for consistent
behavior
- Add 'deepseek-legacy' option to preserve old inline behavior when needed
- Update CLI help and documentation to reflect streaming support
- Add parser tests for inline <think>...</think> segments
The parser now continues processing content after </think> closes instead of
stopping, enabling proper message.reasoning_content and message.content
separation in both streaming and non-streaming modes.
Fixes the issue where streaming responses would dump everything (including
post-thinking content) into reasoning_content while leaving content empty.
* refactor: address review feedback from allozaur
- Passed the assistant message content directly to ChatMessageAssistant to drop the redundant derived state in the chat message component
- Simplified chat streaming updates by removing unused partial-thinking handling and persisting partial responses straight from currentResponse
- Refreshed the ChatMessage stories to cover standard and reasoning scenarios without the old THINK-tag parsing examples
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* refactor: restore forced reasoning prefix to pass test-chat ([chat] All tests passed)
- store the exact sequence seen on input when 'thinking_forced_open' enforces a reasoning block
- inject this prefix before the first accumulated segment in 'reasoning_content', then clear it to avoid duplication
- repeat the capture on every new 'start_think' detection to properly handle partial/streaming flows
* refactor: address review feedback from ngxson
* debug: say goodbye to curl -N, hello one-click raw stream
- adds a new checkbox in the WebUI to display raw LLM output without backend parsing or frontend Markdown rendering
* Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: add Storybook example for raw LLM output and scope reasoning format toggle per story
- Added a Storybook example that showcases the chat message component in raw LLM output mode with the provided trace sample
- Updated every ChatMessage story to toggle the disableReasoningFormat setting so the raw-output rendering remains scoped to its own example
* npm run format
* chat-parser: address review feedback from ngxson
Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com>
---------
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com>
2025-10-08 22:18:41 +02:00
|
|
|
const currentConfig = config();
|
|
|
|
|
|
2025-10-20 12:41:13 +02:00
|
|
|
const requestId = conversationId || 'default';
|
|
|
|
|
|
|
|
|
|
if (this.abortControllers.has(requestId)) {
|
|
|
|
|
this.abortControllers.get(requestId)?.abort();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const abortController = new AbortController();
|
|
|
|
|
this.abortControllers.set(requestId, abortController);
|
2025-09-17 19:29:13 +02:00
|
|
|
|
|
|
|
|
const normalizedMessages: ApiChatMessageData[] = messages
|
|
|
|
|
.map((msg) => {
|
|
|
|
|
if ('id' in msg && 'convId' in msg && 'timestamp' in msg) {
|
|
|
|
|
const dbMsg = msg as DatabaseMessage & { extra?: DatabaseMessageExtra[] };
|
|
|
|
|
return ChatService.convertMessageToChatServiceData(dbMsg);
|
|
|
|
|
} else {
|
|
|
|
|
return msg as ApiChatMessageData;
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
.filter((msg) => {
|
|
|
|
|
if (msg.role === 'system') {
|
|
|
|
|
const content = typeof msg.content === 'string' ? msg.content : '';
|
|
|
|
|
|
|
|
|
|
return content.trim().length > 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
const processedMessages = this.injectSystemMessage(normalizedMessages);
|
|
|
|
|
|
|
|
|
|
const requestBody: ApiChatCompletionRequest = {
|
|
|
|
|
messages: processedMessages.map((msg: ApiChatMessageData) => ({
|
|
|
|
|
role: msg.role,
|
|
|
|
|
content: msg.content
|
|
|
|
|
})),
|
|
|
|
|
stream
|
|
|
|
|
};
|
|
|
|
|
|
webui: introduce OpenAI-compatible model selector in JSON payload (#16562)
* webui: introduce OpenAI-compatible model selector in JSON payload
* webui: restore OpenAI-Compatible model source of truth and unify metadata capture
This change re-establishes a single, reliable source of truth for the active model:
fully aligned with the OpenAI-Compat API behavior
It introduces a unified metadata flow that captures the model field from both
streaming and non-streaming responses, wiring a new onModel callback through ChatService
The model name is now resolved directly from the API payload rather than relying on
server /props or UI assumptions
ChatStore records and persists the resolved model for each assistant message during
streaming, ensuring consistency across the UI and database
Type definitions for API and settings were also extended to include model metadata
and the onModel callback, completing the alignment with OpenAI-Compat semantics
* webui: address review feedback from allozaur
* webui: move model selector into ChatForm (idea by @allozaur)
* webui: make model selector more subtle and integrated into ChatForm
* webui: replaced the Flowbite selector with a native Svelte dropdown
* webui: add developer setting to toggle the chat model selector
* webui: address review feedback from allozaur
Normalized streamed model names during chat updates
by trimming input and removing directory components before saving
or persisting them, so the conversation UI shows only the filename
Forced model names within the chat form selector dropdown to render as
a single-line, truncated entry with a tooltip revealing the full name
* webui: toggle displayed model source for legacy vs OpenAI-Compat modes
When the selector is disabled, it falls back to the active server model name from /props
When the model selector is enabled, the displayed model comes from the message metadata
(the one explicitly selected and sent in the request)
* Update tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/constants/localstorage-keys.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormModelSelector.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/services/chat.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/services/chat.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: refactor model selector and persistence helpers
- Replace inline portal and event listeners with proper Svelte bindings
- Introduce 'persisted' store helper for localStorage sync without runes
- Extract 'normalizeModelName' utils + Vitest coverage
- Simplify ChatFormModelSelector structure and cleanup logic
Replaced the persisted store helper's use of '$state/$effect' runes with
a plain TS implementation to prevent orphaned effect runtime errors
outside component context
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: document normalizeModelName usage with inline examples
* Update tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormModelSelector.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/stores/models.svelte.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/stores/models.svelte.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: extract ModelOption type into dedicated models.d.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: refine ChatMessageAssistant displayedModel source logic
* webui: stabilize dropdown, simplify model extraction, and init assistant model field
* chore: update webui static build
* Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* chore: npm format, update webui static build
* webui: align sidebar trigger position, remove z-index glitch
* chore: update webui build output
---------
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
2025-10-22 16:58:23 +02:00
|
|
|
const modelSelectorEnabled = Boolean(currentConfig.modelSelectorEnabled);
|
|
|
|
|
const activeModel = modelSelectorEnabled ? selectedModelName() : null;
|
|
|
|
|
|
|
|
|
|
if (modelSelectorEnabled && activeModel) {
|
|
|
|
|
requestBody.model = activeModel;
|
|
|
|
|
}
|
|
|
|
|
|
refactor: centralize CoT parsing in backend for streaming mode (#16394)
* refactor: unify reasoning handling via backend reasoning_content, drop frontend tag parsing
- Updated the chat message component to surface backend-supplied reasoning via message.thinking while showing the raw assistant content without inline tag scrubbing
- Simplified chat streaming to append content chunks directly, stream reasoning into the message model, and persist any partial reasoning when generation stops
- Refactored the chat service SSE handler to rely on server-provided reasoning_content, removing legacy <think> parsing logic
- Refreshed Storybook data and streaming flows to populate the thinking field explicitly for static and streaming assistant messages
* refactor: implement streaming-aware universal reasoning parser
Remove the streaming mode limitation from --reasoning-format by refactoring
try_parse_reasoning() to handle incremental parsing of <think> tags across
all formats.
- Rework try_parse_reasoning() to track whitespace, partial tags, and
multiple reasoning segments, allowing proper separation of reasoning_content
and content in streaming mode
- Parse reasoning tags before tool call handling in content-only and Llama 3.x
formats to ensure inline <think> blocks are captured correctly
- Change default reasoning_format from 'auto' to 'deepseek' for consistent
behavior
- Add 'deepseek-legacy' option to preserve old inline behavior when needed
- Update CLI help and documentation to reflect streaming support
- Add parser tests for inline <think>...</think> segments
The parser now continues processing content after </think> closes instead of
stopping, enabling proper message.reasoning_content and message.content
separation in both streaming and non-streaming modes.
Fixes the issue where streaming responses would dump everything (including
post-thinking content) into reasoning_content while leaving content empty.
* refactor: address review feedback from allozaur
- Passed the assistant message content directly to ChatMessageAssistant to drop the redundant derived state in the chat message component
- Simplified chat streaming updates by removing unused partial-thinking handling and persisting partial responses straight from currentResponse
- Refreshed the ChatMessage stories to cover standard and reasoning scenarios without the old THINK-tag parsing examples
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* refactor: restore forced reasoning prefix to pass test-chat ([chat] All tests passed)
- store the exact sequence seen on input when 'thinking_forced_open' enforces a reasoning block
- inject this prefix before the first accumulated segment in 'reasoning_content', then clear it to avoid duplication
- repeat the capture on every new 'start_think' detection to properly handle partial/streaming flows
* refactor: address review feedback from ngxson
* debug: say goodbye to curl -N, hello one-click raw stream
- adds a new checkbox in the WebUI to display raw LLM output without backend parsing or frontend Markdown rendering
* Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: add Storybook example for raw LLM output and scope reasoning format toggle per story
- Added a Storybook example that showcases the chat message component in raw LLM output mode with the provided trace sample
- Updated every ChatMessage story to toggle the disableReasoningFormat setting so the raw-output rendering remains scoped to its own example
* npm run format
* chat-parser: address review feedback from ngxson
Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com>
---------
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com>
2025-10-08 22:18:41 +02:00
|
|
|
requestBody.reasoning_format = currentConfig.disableReasoningFormat ? 'none' : 'auto';
|
2025-09-17 19:29:13 +02:00
|
|
|
|
|
|
|
|
if (temperature !== undefined) requestBody.temperature = temperature;
|
2025-10-09 22:54:57 +02:00
|
|
|
if (max_tokens !== undefined) {
|
|
|
|
|
// Set max_tokens to -1 (infinite) when explicitly configured as 0 or null
|
|
|
|
|
requestBody.max_tokens = max_tokens !== null && max_tokens !== 0 ? max_tokens : -1;
|
|
|
|
|
}
|
2025-09-17 19:29:13 +02:00
|
|
|
|
|
|
|
|
if (dynatemp_range !== undefined) requestBody.dynatemp_range = dynatemp_range;
|
|
|
|
|
if (dynatemp_exponent !== undefined) requestBody.dynatemp_exponent = dynatemp_exponent;
|
|
|
|
|
if (top_k !== undefined) requestBody.top_k = top_k;
|
|
|
|
|
if (top_p !== undefined) requestBody.top_p = top_p;
|
|
|
|
|
if (min_p !== undefined) requestBody.min_p = min_p;
|
|
|
|
|
if (xtc_probability !== undefined) requestBody.xtc_probability = xtc_probability;
|
|
|
|
|
if (xtc_threshold !== undefined) requestBody.xtc_threshold = xtc_threshold;
|
|
|
|
|
if (typ_p !== undefined) requestBody.typ_p = typ_p;
|
|
|
|
|
|
|
|
|
|
if (repeat_last_n !== undefined) requestBody.repeat_last_n = repeat_last_n;
|
|
|
|
|
if (repeat_penalty !== undefined) requestBody.repeat_penalty = repeat_penalty;
|
|
|
|
|
if (presence_penalty !== undefined) requestBody.presence_penalty = presence_penalty;
|
|
|
|
|
if (frequency_penalty !== undefined) requestBody.frequency_penalty = frequency_penalty;
|
|
|
|
|
if (dry_multiplier !== undefined) requestBody.dry_multiplier = dry_multiplier;
|
|
|
|
|
if (dry_base !== undefined) requestBody.dry_base = dry_base;
|
|
|
|
|
if (dry_allowed_length !== undefined) requestBody.dry_allowed_length = dry_allowed_length;
|
|
|
|
|
if (dry_penalty_last_n !== undefined) requestBody.dry_penalty_last_n = dry_penalty_last_n;
|
|
|
|
|
|
|
|
|
|
if (samplers !== undefined) {
|
|
|
|
|
requestBody.samplers =
|
|
|
|
|
typeof samplers === 'string'
|
|
|
|
|
? samplers.split(';').filter((s: string) => s.trim())
|
|
|
|
|
: samplers;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (timings_per_token !== undefined) requestBody.timings_per_token = timings_per_token;
|
|
|
|
|
|
|
|
|
|
if (custom) {
|
|
|
|
|
try {
|
|
|
|
|
const customParams = typeof custom === 'string' ? JSON.parse(custom) : custom;
|
|
|
|
|
Object.assign(requestBody, customParams);
|
|
|
|
|
} catch (error) {
|
|
|
|
|
console.warn('Failed to parse custom parameters:', error);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
const apiKey = currentConfig.apiKey?.toString().trim();
|
|
|
|
|
|
2025-09-26 11:36:48 -04:00
|
|
|
const response = await fetch(`./v1/chat/completions`, {
|
2025-09-17 19:29:13 +02:00
|
|
|
method: 'POST',
|
|
|
|
|
headers: {
|
|
|
|
|
'Content-Type': 'application/json',
|
|
|
|
|
...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {})
|
|
|
|
|
},
|
|
|
|
|
body: JSON.stringify(requestBody),
|
2025-10-20 12:41:13 +02:00
|
|
|
signal: abortController.signal
|
2025-09-17 19:29:13 +02:00
|
|
|
});
|
|
|
|
|
|
|
|
|
|
if (!response.ok) {
|
|
|
|
|
const error = await this.parseErrorResponse(response);
|
|
|
|
|
if (onError) {
|
|
|
|
|
onError(error);
|
|
|
|
|
}
|
|
|
|
|
throw error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (stream) {
|
2025-10-20 12:41:13 +02:00
|
|
|
await this.handleStreamResponse(
|
2025-09-17 19:29:13 +02:00
|
|
|
response,
|
|
|
|
|
onChunk,
|
|
|
|
|
onComplete,
|
|
|
|
|
onError,
|
webui: introduce OpenAI-compatible model selector in JSON payload (#16562)
* webui: introduce OpenAI-compatible model selector in JSON payload
* webui: restore OpenAI-Compatible model source of truth and unify metadata capture
This change re-establishes a single, reliable source of truth for the active model:
fully aligned with the OpenAI-Compat API behavior
It introduces a unified metadata flow that captures the model field from both
streaming and non-streaming responses, wiring a new onModel callback through ChatService
The model name is now resolved directly from the API payload rather than relying on
server /props or UI assumptions
ChatStore records and persists the resolved model for each assistant message during
streaming, ensuring consistency across the UI and database
Type definitions for API and settings were also extended to include model metadata
and the onModel callback, completing the alignment with OpenAI-Compat semantics
* webui: address review feedback from allozaur
* webui: move model selector into ChatForm (idea by @allozaur)
* webui: make model selector more subtle and integrated into ChatForm
* webui: replaced the Flowbite selector with a native Svelte dropdown
* webui: add developer setting to toggle the chat model selector
* webui: address review feedback from allozaur
Normalized streamed model names during chat updates
by trimming input and removing directory components before saving
or persisting them, so the conversation UI shows only the filename
Forced model names within the chat form selector dropdown to render as
a single-line, truncated entry with a tooltip revealing the full name
* webui: toggle displayed model source for legacy vs OpenAI-Compat modes
When the selector is disabled, it falls back to the active server model name from /props
When the model selector is enabled, the displayed model comes from the message metadata
(the one explicitly selected and sent in the request)
* Update tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/constants/localstorage-keys.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormModelSelector.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/services/chat.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/services/chat.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: refactor model selector and persistence helpers
- Replace inline portal and event listeners with proper Svelte bindings
- Introduce 'persisted' store helper for localStorage sync without runes
- Extract 'normalizeModelName' utils + Vitest coverage
- Simplify ChatFormModelSelector structure and cleanup logic
Replaced the persisted store helper's use of '$state/$effect' runes with
a plain TS implementation to prevent orphaned effect runtime errors
outside component context
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: document normalizeModelName usage with inline examples
* Update tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormModelSelector.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/stores/models.svelte.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/stores/models.svelte.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: extract ModelOption type into dedicated models.d.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: refine ChatMessageAssistant displayedModel source logic
* webui: stabilize dropdown, simplify model extraction, and init assistant model field
* chore: update webui static build
* Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* chore: npm format, update webui static build
* webui: align sidebar trigger position, remove z-index glitch
* chore: update webui build output
---------
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
2025-10-22 16:58:23 +02:00
|
|
|
onReasoningChunk,
|
|
|
|
|
onModel,
|
2025-10-20 12:41:13 +02:00
|
|
|
conversationId,
|
|
|
|
|
abortController.signal
|
2025-09-17 19:29:13 +02:00
|
|
|
);
|
2025-10-20 12:41:13 +02:00
|
|
|
return;
|
2025-09-17 19:29:13 +02:00
|
|
|
} else {
|
webui: introduce OpenAI-compatible model selector in JSON payload (#16562)
* webui: introduce OpenAI-compatible model selector in JSON payload
* webui: restore OpenAI-Compatible model source of truth and unify metadata capture
This change re-establishes a single, reliable source of truth for the active model:
fully aligned with the OpenAI-Compat API behavior
It introduces a unified metadata flow that captures the model field from both
streaming and non-streaming responses, wiring a new onModel callback through ChatService
The model name is now resolved directly from the API payload rather than relying on
server /props or UI assumptions
ChatStore records and persists the resolved model for each assistant message during
streaming, ensuring consistency across the UI and database
Type definitions for API and settings were also extended to include model metadata
and the onModel callback, completing the alignment with OpenAI-Compat semantics
* webui: address review feedback from allozaur
* webui: move model selector into ChatForm (idea by @allozaur)
* webui: make model selector more subtle and integrated into ChatForm
* webui: replaced the Flowbite selector with a native Svelte dropdown
* webui: add developer setting to toggle the chat model selector
* webui: address review feedback from allozaur
Normalized streamed model names during chat updates
by trimming input and removing directory components before saving
or persisting them, so the conversation UI shows only the filename
Forced model names within the chat form selector dropdown to render as
a single-line, truncated entry with a tooltip revealing the full name
* webui: toggle displayed model source for legacy vs OpenAI-Compat modes
When the selector is disabled, it falls back to the active server model name from /props
When the model selector is enabled, the displayed model comes from the message metadata
(the one explicitly selected and sent in the request)
* Update tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/constants/localstorage-keys.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormModelSelector.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/services/chat.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/services/chat.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: refactor model selector and persistence helpers
- Replace inline portal and event listeners with proper Svelte bindings
- Introduce 'persisted' store helper for localStorage sync without runes
- Extract 'normalizeModelName' utils + Vitest coverage
- Simplify ChatFormModelSelector structure and cleanup logic
Replaced the persisted store helper's use of '$state/$effect' runes with
a plain TS implementation to prevent orphaned effect runtime errors
outside component context
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: document normalizeModelName usage with inline examples
* Update tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormModelSelector.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/stores/models.svelte.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/stores/models.svelte.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: extract ModelOption type into dedicated models.d.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: refine ChatMessageAssistant displayedModel source logic
* webui: stabilize dropdown, simplify model extraction, and init assistant model field
* chore: update webui static build
* Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* chore: npm format, update webui static build
* webui: align sidebar trigger position, remove z-index glitch
* chore: update webui build output
---------
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
2025-10-22 16:58:23 +02:00
|
|
|
return this.handleNonStreamResponse(response, onComplete, onError, onModel);
|
2025-09-17 19:29:13 +02:00
|
|
|
}
|
|
|
|
|
} catch (error) {
|
|
|
|
|
if (error instanceof Error && error.name === 'AbortError') {
|
|
|
|
|
console.log('Chat completion request was aborted');
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let userFriendlyError: Error;
|
|
|
|
|
|
|
|
|
|
if (error instanceof Error) {
|
|
|
|
|
if (error.name === 'TypeError' && error.message.includes('fetch')) {
|
|
|
|
|
userFriendlyError = new Error(
|
|
|
|
|
'Unable to connect to server - please check if the server is running'
|
|
|
|
|
);
|
2025-10-12 18:06:41 +02:00
|
|
|
userFriendlyError.name = 'NetworkError';
|
2025-09-17 19:29:13 +02:00
|
|
|
} else if (error.message.includes('ECONNREFUSED')) {
|
|
|
|
|
userFriendlyError = new Error('Connection refused - server may be offline');
|
2025-10-12 18:06:41 +02:00
|
|
|
userFriendlyError.name = 'NetworkError';
|
2025-09-17 19:29:13 +02:00
|
|
|
} else if (error.message.includes('ETIMEDOUT')) {
|
2025-10-12 18:06:41 +02:00
|
|
|
userFriendlyError = new Error('Request timed out - the server took too long to respond');
|
|
|
|
|
userFriendlyError.name = 'TimeoutError';
|
2025-09-17 19:29:13 +02:00
|
|
|
} else {
|
|
|
|
|
userFriendlyError = error;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
userFriendlyError = new Error('Unknown error occurred while sending message');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
console.error('Error in sendMessage:', error);
|
|
|
|
|
if (onError) {
|
|
|
|
|
onError(userFriendlyError);
|
|
|
|
|
}
|
|
|
|
|
throw userFriendlyError;
|
2025-10-20 12:41:13 +02:00
|
|
|
} finally {
|
|
|
|
|
this.abortControllers.delete(requestId);
|
2025-09-17 19:29:13 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2025-10-20 12:41:13 +02:00
|
|
|
* Handles streaming response from the chat completion API
|
|
|
|
|
* @param response - The Response object from the fetch request
|
2025-09-17 19:29:13 +02:00
|
|
|
* @param onChunk - Optional callback invoked for each content chunk received
|
|
|
|
|
* @param onComplete - Optional callback invoked when the stream is complete with full response
|
|
|
|
|
* @param onError - Optional callback invoked if an error occurs during streaming
|
|
|
|
|
* @param onReasoningChunk - Optional callback invoked for each reasoning content chunk
|
2025-10-20 12:41:13 +02:00
|
|
|
* @param conversationId - Optional conversation ID for per-conversation state tracking
|
2025-09-17 19:29:13 +02:00
|
|
|
* @returns {Promise<void>} Promise that resolves when streaming is complete
|
|
|
|
|
* @throws {Error} if the stream cannot be read or parsed
|
|
|
|
|
*/
|
|
|
|
|
private async handleStreamResponse(
|
|
|
|
|
response: Response,
|
|
|
|
|
onChunk?: (chunk: string) => void,
|
|
|
|
|
onComplete?: (
|
|
|
|
|
response: string,
|
|
|
|
|
reasoningContent?: string,
|
|
|
|
|
timings?: ChatMessageTimings
|
|
|
|
|
) => void,
|
|
|
|
|
onError?: (error: Error) => void,
|
2025-10-20 12:41:13 +02:00
|
|
|
onReasoningChunk?: (chunk: string) => void,
|
webui: introduce OpenAI-compatible model selector in JSON payload (#16562)
* webui: introduce OpenAI-compatible model selector in JSON payload
* webui: restore OpenAI-Compatible model source of truth and unify metadata capture
This change re-establishes a single, reliable source of truth for the active model:
fully aligned with the OpenAI-Compat API behavior
It introduces a unified metadata flow that captures the model field from both
streaming and non-streaming responses, wiring a new onModel callback through ChatService
The model name is now resolved directly from the API payload rather than relying on
server /props or UI assumptions
ChatStore records and persists the resolved model for each assistant message during
streaming, ensuring consistency across the UI and database
Type definitions for API and settings were also extended to include model metadata
and the onModel callback, completing the alignment with OpenAI-Compat semantics
* webui: address review feedback from allozaur
* webui: move model selector into ChatForm (idea by @allozaur)
* webui: make model selector more subtle and integrated into ChatForm
* webui: replaced the Flowbite selector with a native Svelte dropdown
* webui: add developer setting to toggle the chat model selector
* webui: address review feedback from allozaur
Normalized streamed model names during chat updates
by trimming input and removing directory components before saving
or persisting them, so the conversation UI shows only the filename
Forced model names within the chat form selector dropdown to render as
a single-line, truncated entry with a tooltip revealing the full name
* webui: toggle displayed model source for legacy vs OpenAI-Compat modes
When the selector is disabled, it falls back to the active server model name from /props
When the model selector is enabled, the displayed model comes from the message metadata
(the one explicitly selected and sent in the request)
* Update tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/constants/localstorage-keys.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormModelSelector.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/services/chat.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/services/chat.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: refactor model selector and persistence helpers
- Replace inline portal and event listeners with proper Svelte bindings
- Introduce 'persisted' store helper for localStorage sync without runes
- Extract 'normalizeModelName' utils + Vitest coverage
- Simplify ChatFormModelSelector structure and cleanup logic
Replaced the persisted store helper's use of '$state/$effect' runes with
a plain TS implementation to prevent orphaned effect runtime errors
outside component context
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: document normalizeModelName usage with inline examples
* Update tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormModelSelector.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/stores/models.svelte.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/stores/models.svelte.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: extract ModelOption type into dedicated models.d.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: refine ChatMessageAssistant displayedModel source logic
* webui: stabilize dropdown, simplify model extraction, and init assistant model field
* chore: update webui static build
* Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* chore: npm format, update webui static build
* webui: align sidebar trigger position, remove z-index glitch
* chore: update webui build output
---------
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
2025-10-22 16:58:23 +02:00
|
|
|
onModel?: (model: string) => void,
|
2025-10-20 12:41:13 +02:00
|
|
|
conversationId?: string,
|
|
|
|
|
abortSignal?: AbortSignal
|
2025-09-17 19:29:13 +02:00
|
|
|
): Promise<void> {
|
|
|
|
|
const reader = response.body?.getReader();
|
|
|
|
|
|
|
|
|
|
if (!reader) {
|
|
|
|
|
throw new Error('No response body');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const decoder = new TextDecoder();
|
refactor: centralize CoT parsing in backend for streaming mode (#16394)
* refactor: unify reasoning handling via backend reasoning_content, drop frontend tag parsing
- Updated the chat message component to surface backend-supplied reasoning via message.thinking while showing the raw assistant content without inline tag scrubbing
- Simplified chat streaming to append content chunks directly, stream reasoning into the message model, and persist any partial reasoning when generation stops
- Refactored the chat service SSE handler to rely on server-provided reasoning_content, removing legacy <think> parsing logic
- Refreshed Storybook data and streaming flows to populate the thinking field explicitly for static and streaming assistant messages
* refactor: implement streaming-aware universal reasoning parser
Remove the streaming mode limitation from --reasoning-format by refactoring
try_parse_reasoning() to handle incremental parsing of <think> tags across
all formats.
- Rework try_parse_reasoning() to track whitespace, partial tags, and
multiple reasoning segments, allowing proper separation of reasoning_content
and content in streaming mode
- Parse reasoning tags before tool call handling in content-only and Llama 3.x
formats to ensure inline <think> blocks are captured correctly
- Change default reasoning_format from 'auto' to 'deepseek' for consistent
behavior
- Add 'deepseek-legacy' option to preserve old inline behavior when needed
- Update CLI help and documentation to reflect streaming support
- Add parser tests for inline <think>...</think> segments
The parser now continues processing content after </think> closes instead of
stopping, enabling proper message.reasoning_content and message.content
separation in both streaming and non-streaming modes.
Fixes the issue where streaming responses would dump everything (including
post-thinking content) into reasoning_content while leaving content empty.
* refactor: address review feedback from allozaur
- Passed the assistant message content directly to ChatMessageAssistant to drop the redundant derived state in the chat message component
- Simplified chat streaming updates by removing unused partial-thinking handling and persisting partial responses straight from currentResponse
- Refreshed the ChatMessage stories to cover standard and reasoning scenarios without the old THINK-tag parsing examples
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* refactor: restore forced reasoning prefix to pass test-chat ([chat] All tests passed)
- store the exact sequence seen on input when 'thinking_forced_open' enforces a reasoning block
- inject this prefix before the first accumulated segment in 'reasoning_content', then clear it to avoid duplication
- repeat the capture on every new 'start_think' detection to properly handle partial/streaming flows
* refactor: address review feedback from ngxson
* debug: say goodbye to curl -N, hello one-click raw stream
- adds a new checkbox in the WebUI to display raw LLM output without backend parsing or frontend Markdown rendering
* Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: add Storybook example for raw LLM output and scope reasoning format toggle per story
- Added a Storybook example that showcases the chat message component in raw LLM output mode with the provided trace sample
- Updated every ChatMessage story to toggle the disableReasoningFormat setting so the raw-output rendering remains scoped to its own example
* npm run format
* chat-parser: address review feedback from ngxson
Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com>
---------
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com>
2025-10-08 22:18:41 +02:00
|
|
|
let aggregatedContent = '';
|
2025-09-17 19:29:13 +02:00
|
|
|
let fullReasoningContent = '';
|
|
|
|
|
let hasReceivedData = false;
|
|
|
|
|
let lastTimings: ChatMessageTimings | undefined;
|
2025-10-12 18:06:41 +02:00
|
|
|
let streamFinished = false;
|
webui: introduce OpenAI-compatible model selector in JSON payload (#16562)
* webui: introduce OpenAI-compatible model selector in JSON payload
* webui: restore OpenAI-Compatible model source of truth and unify metadata capture
This change re-establishes a single, reliable source of truth for the active model:
fully aligned with the OpenAI-Compat API behavior
It introduces a unified metadata flow that captures the model field from both
streaming and non-streaming responses, wiring a new onModel callback through ChatService
The model name is now resolved directly from the API payload rather than relying on
server /props or UI assumptions
ChatStore records and persists the resolved model for each assistant message during
streaming, ensuring consistency across the UI and database
Type definitions for API and settings were also extended to include model metadata
and the onModel callback, completing the alignment with OpenAI-Compat semantics
* webui: address review feedback from allozaur
* webui: move model selector into ChatForm (idea by @allozaur)
* webui: make model selector more subtle and integrated into ChatForm
* webui: replaced the Flowbite selector with a native Svelte dropdown
* webui: add developer setting to toggle the chat model selector
* webui: address review feedback from allozaur
Normalized streamed model names during chat updates
by trimming input and removing directory components before saving
or persisting them, so the conversation UI shows only the filename
Forced model names within the chat form selector dropdown to render as
a single-line, truncated entry with a tooltip revealing the full name
* webui: toggle displayed model source for legacy vs OpenAI-Compat modes
When the selector is disabled, it falls back to the active server model name from /props
When the model selector is enabled, the displayed model comes from the message metadata
(the one explicitly selected and sent in the request)
* Update tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/constants/localstorage-keys.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormModelSelector.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/services/chat.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/services/chat.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: refactor model selector and persistence helpers
- Replace inline portal and event listeners with proper Svelte bindings
- Introduce 'persisted' store helper for localStorage sync without runes
- Extract 'normalizeModelName' utils + Vitest coverage
- Simplify ChatFormModelSelector structure and cleanup logic
Replaced the persisted store helper's use of '$state/$effect' runes with
a plain TS implementation to prevent orphaned effect runtime errors
outside component context
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: document normalizeModelName usage with inline examples
* Update tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormModelSelector.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/stores/models.svelte.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/stores/models.svelte.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: extract ModelOption type into dedicated models.d.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: refine ChatMessageAssistant displayedModel source logic
* webui: stabilize dropdown, simplify model extraction, and init assistant model field
* chore: update webui static build
* Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* chore: npm format, update webui static build
* webui: align sidebar trigger position, remove z-index glitch
* chore: update webui build output
---------
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
2025-10-22 16:58:23 +02:00
|
|
|
let modelEmitted = false;
|
2025-09-17 19:29:13 +02:00
|
|
|
|
|
|
|
|
try {
|
2025-09-22 10:53:13 +02:00
|
|
|
let chunk = '';
|
2025-09-17 19:29:13 +02:00
|
|
|
while (true) {
|
2025-10-20 12:41:13 +02:00
|
|
|
if (abortSignal?.aborted) break;
|
|
|
|
|
|
2025-09-17 19:29:13 +02:00
|
|
|
const { done, value } = await reader.read();
|
|
|
|
|
if (done) break;
|
|
|
|
|
|
2025-10-20 12:41:13 +02:00
|
|
|
if (abortSignal?.aborted) break;
|
|
|
|
|
|
2025-09-22 10:53:13 +02:00
|
|
|
chunk += decoder.decode(value, { stream: true });
|
2025-09-17 19:29:13 +02:00
|
|
|
const lines = chunk.split('\n');
|
2025-10-20 12:41:13 +02:00
|
|
|
chunk = lines.pop() || '';
|
2025-09-17 19:29:13 +02:00
|
|
|
|
|
|
|
|
for (const line of lines) {
|
2025-10-20 12:41:13 +02:00
|
|
|
if (abortSignal?.aborted) break;
|
|
|
|
|
|
2025-09-17 19:29:13 +02:00
|
|
|
if (line.startsWith('data: ')) {
|
|
|
|
|
const data = line.slice(6);
|
|
|
|
|
if (data === '[DONE]') {
|
2025-10-12 18:06:41 +02:00
|
|
|
streamFinished = true;
|
|
|
|
|
continue;
|
2025-09-17 19:29:13 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
const parsed: ApiChatCompletionStreamChunk = JSON.parse(data);
|
|
|
|
|
|
webui: introduce OpenAI-compatible model selector in JSON payload (#16562)
* webui: introduce OpenAI-compatible model selector in JSON payload
* webui: restore OpenAI-Compatible model source of truth and unify metadata capture
This change re-establishes a single, reliable source of truth for the active model:
fully aligned with the OpenAI-Compat API behavior
It introduces a unified metadata flow that captures the model field from both
streaming and non-streaming responses, wiring a new onModel callback through ChatService
The model name is now resolved directly from the API payload rather than relying on
server /props or UI assumptions
ChatStore records and persists the resolved model for each assistant message during
streaming, ensuring consistency across the UI and database
Type definitions for API and settings were also extended to include model metadata
and the onModel callback, completing the alignment with OpenAI-Compat semantics
* webui: address review feedback from allozaur
* webui: move model selector into ChatForm (idea by @allozaur)
* webui: make model selector more subtle and integrated into ChatForm
* webui: replaced the Flowbite selector with a native Svelte dropdown
* webui: add developer setting to toggle the chat model selector
* webui: address review feedback from allozaur
Normalized streamed model names during chat updates
by trimming input and removing directory components before saving
or persisting them, so the conversation UI shows only the filename
Forced model names within the chat form selector dropdown to render as
a single-line, truncated entry with a tooltip revealing the full name
* webui: toggle displayed model source for legacy vs OpenAI-Compat modes
When the selector is disabled, it falls back to the active server model name from /props
When the model selector is enabled, the displayed model comes from the message metadata
(the one explicitly selected and sent in the request)
* Update tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/constants/localstorage-keys.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormModelSelector.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/services/chat.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/services/chat.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: refactor model selector and persistence helpers
- Replace inline portal and event listeners with proper Svelte bindings
- Introduce 'persisted' store helper for localStorage sync without runes
- Extract 'normalizeModelName' utils + Vitest coverage
- Simplify ChatFormModelSelector structure and cleanup logic
Replaced the persisted store helper's use of '$state/$effect' runes with
a plain TS implementation to prevent orphaned effect runtime errors
outside component context
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: document normalizeModelName usage with inline examples
* Update tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormModelSelector.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/stores/models.svelte.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/stores/models.svelte.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: extract ModelOption type into dedicated models.d.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: refine ChatMessageAssistant displayedModel source logic
* webui: stabilize dropdown, simplify model extraction, and init assistant model field
* chore: update webui static build
* Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* chore: npm format, update webui static build
* webui: align sidebar trigger position, remove z-index glitch
* chore: update webui build output
---------
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
2025-10-22 16:58:23 +02:00
|
|
|
const chunkModel = this.extractModelName(parsed);
|
|
|
|
|
if (chunkModel && !modelEmitted) {
|
|
|
|
|
modelEmitted = true;
|
|
|
|
|
onModel?.(chunkModel);
|
|
|
|
|
}
|
|
|
|
|
|
2025-09-17 19:29:13 +02:00
|
|
|
const content = parsed.choices[0]?.delta?.content;
|
|
|
|
|
const reasoningContent = parsed.choices[0]?.delta?.reasoning_content;
|
|
|
|
|
const timings = parsed.timings;
|
|
|
|
|
const promptProgress = parsed.prompt_progress;
|
|
|
|
|
|
|
|
|
|
if (timings || promptProgress) {
|
2025-10-20 12:41:13 +02:00
|
|
|
this.updateProcessingState(timings, promptProgress, conversationId);
|
2025-09-17 19:29:13 +02:00
|
|
|
if (timings) {
|
|
|
|
|
lastTimings = timings;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (content) {
|
|
|
|
|
hasReceivedData = true;
|
refactor: centralize CoT parsing in backend for streaming mode (#16394)
* refactor: unify reasoning handling via backend reasoning_content, drop frontend tag parsing
- Updated the chat message component to surface backend-supplied reasoning via message.thinking while showing the raw assistant content without inline tag scrubbing
- Simplified chat streaming to append content chunks directly, stream reasoning into the message model, and persist any partial reasoning when generation stops
- Refactored the chat service SSE handler to rely on server-provided reasoning_content, removing legacy <think> parsing logic
- Refreshed Storybook data and streaming flows to populate the thinking field explicitly for static and streaming assistant messages
* refactor: implement streaming-aware universal reasoning parser
Remove the streaming mode limitation from --reasoning-format by refactoring
try_parse_reasoning() to handle incremental parsing of <think> tags across
all formats.
- Rework try_parse_reasoning() to track whitespace, partial tags, and
multiple reasoning segments, allowing proper separation of reasoning_content
and content in streaming mode
- Parse reasoning tags before tool call handling in content-only and Llama 3.x
formats to ensure inline <think> blocks are captured correctly
- Change default reasoning_format from 'auto' to 'deepseek' for consistent
behavior
- Add 'deepseek-legacy' option to preserve old inline behavior when needed
- Update CLI help and documentation to reflect streaming support
- Add parser tests for inline <think>...</think> segments
The parser now continues processing content after </think> closes instead of
stopping, enabling proper message.reasoning_content and message.content
separation in both streaming and non-streaming modes.
Fixes the issue where streaming responses would dump everything (including
post-thinking content) into reasoning_content while leaving content empty.
* refactor: address review feedback from allozaur
- Passed the assistant message content directly to ChatMessageAssistant to drop the redundant derived state in the chat message component
- Simplified chat streaming updates by removing unused partial-thinking handling and persisting partial responses straight from currentResponse
- Refreshed the ChatMessage stories to cover standard and reasoning scenarios without the old THINK-tag parsing examples
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* refactor: restore forced reasoning prefix to pass test-chat ([chat] All tests passed)
- store the exact sequence seen on input when 'thinking_forced_open' enforces a reasoning block
- inject this prefix before the first accumulated segment in 'reasoning_content', then clear it to avoid duplication
- repeat the capture on every new 'start_think' detection to properly handle partial/streaming flows
* refactor: address review feedback from ngxson
* debug: say goodbye to curl -N, hello one-click raw stream
- adds a new checkbox in the WebUI to display raw LLM output without backend parsing or frontend Markdown rendering
* Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: add Storybook example for raw LLM output and scope reasoning format toggle per story
- Added a Storybook example that showcases the chat message component in raw LLM output mode with the provided trace sample
- Updated every ChatMessage story to toggle the disableReasoningFormat setting so the raw-output rendering remains scoped to its own example
* npm run format
* chat-parser: address review feedback from ngxson
Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com>
---------
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com>
2025-10-08 22:18:41 +02:00
|
|
|
aggregatedContent += content;
|
2025-10-20 12:41:13 +02:00
|
|
|
if (!abortSignal?.aborted) {
|
|
|
|
|
onChunk?.(content);
|
|
|
|
|
}
|
2025-09-17 19:29:13 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (reasoningContent) {
|
|
|
|
|
hasReceivedData = true;
|
|
|
|
|
fullReasoningContent += reasoningContent;
|
2025-10-20 12:41:13 +02:00
|
|
|
if (!abortSignal?.aborted) {
|
|
|
|
|
onReasoningChunk?.(reasoningContent);
|
|
|
|
|
}
|
2025-09-17 19:29:13 +02:00
|
|
|
}
|
|
|
|
|
} catch (e) {
|
|
|
|
|
console.error('Error parsing JSON chunk:', e);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-10-20 12:41:13 +02:00
|
|
|
|
|
|
|
|
if (abortSignal?.aborted) break;
|
2025-09-17 19:29:13 +02:00
|
|
|
}
|
|
|
|
|
|
2025-10-20 12:41:13 +02:00
|
|
|
if (abortSignal?.aborted) return;
|
|
|
|
|
|
2025-10-12 18:06:41 +02:00
|
|
|
if (streamFinished) {
|
|
|
|
|
if (!hasReceivedData && aggregatedContent.length === 0) {
|
|
|
|
|
const noResponseError = new Error('No response received from server. Please try again.');
|
|
|
|
|
throw noResponseError;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
onComplete?.(aggregatedContent, fullReasoningContent || undefined, lastTimings);
|
2025-09-17 19:29:13 +02:00
|
|
|
}
|
|
|
|
|
} catch (error) {
|
|
|
|
|
const err = error instanceof Error ? error : new Error('Stream error');
|
|
|
|
|
|
|
|
|
|
onError?.(err);
|
|
|
|
|
|
|
|
|
|
throw err;
|
|
|
|
|
} finally {
|
|
|
|
|
reader.releaseLock();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Handles non-streaming response from the chat completion API.
|
|
|
|
|
* Parses the JSON response and extracts the generated content.
|
|
|
|
|
*
|
|
|
|
|
* @param response - The fetch Response object containing the JSON data
|
|
|
|
|
* @param onComplete - Optional callback invoked when response is successfully parsed
|
|
|
|
|
* @param onError - Optional callback invoked if an error occurs during parsing
|
|
|
|
|
* @returns {Promise<string>} Promise that resolves to the generated content string
|
|
|
|
|
* @throws {Error} if the response cannot be parsed or is malformed
|
|
|
|
|
*/
|
|
|
|
|
private async handleNonStreamResponse(
|
|
|
|
|
response: Response,
|
|
|
|
|
onComplete?: (
|
|
|
|
|
response: string,
|
|
|
|
|
reasoningContent?: string,
|
|
|
|
|
timings?: ChatMessageTimings
|
|
|
|
|
) => void,
|
webui: introduce OpenAI-compatible model selector in JSON payload (#16562)
* webui: introduce OpenAI-compatible model selector in JSON payload
* webui: restore OpenAI-Compatible model source of truth and unify metadata capture
This change re-establishes a single, reliable source of truth for the active model:
fully aligned with the OpenAI-Compat API behavior
It introduces a unified metadata flow that captures the model field from both
streaming and non-streaming responses, wiring a new onModel callback through ChatService
The model name is now resolved directly from the API payload rather than relying on
server /props or UI assumptions
ChatStore records and persists the resolved model for each assistant message during
streaming, ensuring consistency across the UI and database
Type definitions for API and settings were also extended to include model metadata
and the onModel callback, completing the alignment with OpenAI-Compat semantics
* webui: address review feedback from allozaur
* webui: move model selector into ChatForm (idea by @allozaur)
* webui: make model selector more subtle and integrated into ChatForm
* webui: replaced the Flowbite selector with a native Svelte dropdown
* webui: add developer setting to toggle the chat model selector
* webui: address review feedback from allozaur
Normalized streamed model names during chat updates
by trimming input and removing directory components before saving
or persisting them, so the conversation UI shows only the filename
Forced model names within the chat form selector dropdown to render as
a single-line, truncated entry with a tooltip revealing the full name
* webui: toggle displayed model source for legacy vs OpenAI-Compat modes
When the selector is disabled, it falls back to the active server model name from /props
When the model selector is enabled, the displayed model comes from the message metadata
(the one explicitly selected and sent in the request)
* Update tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/constants/localstorage-keys.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormModelSelector.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/services/chat.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/services/chat.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: refactor model selector and persistence helpers
- Replace inline portal and event listeners with proper Svelte bindings
- Introduce 'persisted' store helper for localStorage sync without runes
- Extract 'normalizeModelName' utils + Vitest coverage
- Simplify ChatFormModelSelector structure and cleanup logic
Replaced the persisted store helper's use of '$state/$effect' runes with
a plain TS implementation to prevent orphaned effect runtime errors
outside component context
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: document normalizeModelName usage with inline examples
* Update tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormModelSelector.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/stores/models.svelte.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/stores/models.svelte.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: extract ModelOption type into dedicated models.d.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: refine ChatMessageAssistant displayedModel source logic
* webui: stabilize dropdown, simplify model extraction, and init assistant model field
* chore: update webui static build
* Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* chore: npm format, update webui static build
* webui: align sidebar trigger position, remove z-index glitch
* chore: update webui build output
---------
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
2025-10-22 16:58:23 +02:00
|
|
|
onError?: (error: Error) => void,
|
|
|
|
|
onModel?: (model: string) => void
|
2025-09-17 19:29:13 +02:00
|
|
|
): Promise<string> {
|
|
|
|
|
try {
|
|
|
|
|
const responseText = await response.text();
|
|
|
|
|
|
|
|
|
|
if (!responseText.trim()) {
|
2025-10-12 18:06:41 +02:00
|
|
|
const noResponseError = new Error('No response received from server. Please try again.');
|
|
|
|
|
throw noResponseError;
|
2025-09-17 19:29:13 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const data: ApiChatCompletionResponse = JSON.parse(responseText);
|
webui: introduce OpenAI-compatible model selector in JSON payload (#16562)
* webui: introduce OpenAI-compatible model selector in JSON payload
* webui: restore OpenAI-Compatible model source of truth and unify metadata capture
This change re-establishes a single, reliable source of truth for the active model:
fully aligned with the OpenAI-Compat API behavior
It introduces a unified metadata flow that captures the model field from both
streaming and non-streaming responses, wiring a new onModel callback through ChatService
The model name is now resolved directly from the API payload rather than relying on
server /props or UI assumptions
ChatStore records and persists the resolved model for each assistant message during
streaming, ensuring consistency across the UI and database
Type definitions for API and settings were also extended to include model metadata
and the onModel callback, completing the alignment with OpenAI-Compat semantics
* webui: address review feedback from allozaur
* webui: move model selector into ChatForm (idea by @allozaur)
* webui: make model selector more subtle and integrated into ChatForm
* webui: replaced the Flowbite selector with a native Svelte dropdown
* webui: add developer setting to toggle the chat model selector
* webui: address review feedback from allozaur
Normalized streamed model names during chat updates
by trimming input and removing directory components before saving
or persisting them, so the conversation UI shows only the filename
Forced model names within the chat form selector dropdown to render as
a single-line, truncated entry with a tooltip revealing the full name
* webui: toggle displayed model source for legacy vs OpenAI-Compat modes
When the selector is disabled, it falls back to the active server model name from /props
When the model selector is enabled, the displayed model comes from the message metadata
(the one explicitly selected and sent in the request)
* Update tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/constants/localstorage-keys.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormModelSelector.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/services/chat.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/services/chat.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: refactor model selector and persistence helpers
- Replace inline portal and event listeners with proper Svelte bindings
- Introduce 'persisted' store helper for localStorage sync without runes
- Extract 'normalizeModelName' utils + Vitest coverage
- Simplify ChatFormModelSelector structure and cleanup logic
Replaced the persisted store helper's use of '$state/$effect' runes with
a plain TS implementation to prevent orphaned effect runtime errors
outside component context
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: document normalizeModelName usage with inline examples
* Update tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormModelSelector.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/stores/models.svelte.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/stores/models.svelte.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: extract ModelOption type into dedicated models.d.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: refine ChatMessageAssistant displayedModel source logic
* webui: stabilize dropdown, simplify model extraction, and init assistant model field
* chore: update webui static build
* Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* chore: npm format, update webui static build
* webui: align sidebar trigger position, remove z-index glitch
* chore: update webui build output
---------
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
2025-10-22 16:58:23 +02:00
|
|
|
|
|
|
|
|
const responseModel = this.extractModelName(data);
|
|
|
|
|
if (responseModel) {
|
|
|
|
|
onModel?.(responseModel);
|
|
|
|
|
}
|
|
|
|
|
|
2025-09-17 19:29:13 +02:00
|
|
|
const content = data.choices[0]?.message?.content || '';
|
|
|
|
|
const reasoningContent = data.choices[0]?.message?.reasoning_content;
|
|
|
|
|
|
|
|
|
|
if (reasoningContent) {
|
|
|
|
|
console.log('Full reasoning content:', reasoningContent);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!content.trim()) {
|
2025-10-12 18:06:41 +02:00
|
|
|
const noResponseError = new Error('No response received from server. Please try again.');
|
|
|
|
|
throw noResponseError;
|
2025-09-17 19:29:13 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
onComplete?.(content, reasoningContent);
|
|
|
|
|
|
|
|
|
|
return content;
|
|
|
|
|
} catch (error) {
|
|
|
|
|
const err = error instanceof Error ? error : new Error('Parse error');
|
|
|
|
|
|
|
|
|
|
onError?.(err);
|
|
|
|
|
|
|
|
|
|
throw err;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Converts a database message with attachments to API chat message format.
|
|
|
|
|
* Processes various attachment types (images, text files, PDFs) and formats them
|
|
|
|
|
* as content parts suitable for the chat completion API.
|
|
|
|
|
*
|
|
|
|
|
* @param message - Database message object with optional extra attachments
|
|
|
|
|
* @param message.content - The text content of the message
|
|
|
|
|
* @param message.role - The role of the message sender (user, assistant, system)
|
|
|
|
|
* @param message.extra - Optional array of message attachments (images, files, etc.)
|
|
|
|
|
* @returns {ApiChatMessageData} object formatted for the chat completion API
|
|
|
|
|
* @static
|
|
|
|
|
*/
|
|
|
|
|
static convertMessageToChatServiceData(
|
|
|
|
|
message: DatabaseMessage & { extra?: DatabaseMessageExtra[] }
|
|
|
|
|
): ApiChatMessageData {
|
|
|
|
|
if (!message.extra || message.extra.length === 0) {
|
|
|
|
|
return {
|
|
|
|
|
role: message.role as 'user' | 'assistant' | 'system',
|
|
|
|
|
content: message.content
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const contentParts: ApiChatMessageContentPart[] = [];
|
|
|
|
|
|
|
|
|
|
if (message.content) {
|
|
|
|
|
contentParts.push({
|
|
|
|
|
type: 'text',
|
|
|
|
|
text: message.content
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const imageFiles = message.extra.filter(
|
|
|
|
|
(extra: DatabaseMessageExtra): extra is DatabaseMessageExtraImageFile =>
|
|
|
|
|
extra.type === 'imageFile'
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
for (const image of imageFiles) {
|
|
|
|
|
contentParts.push({
|
|
|
|
|
type: 'image_url',
|
|
|
|
|
image_url: { url: image.base64Url }
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const textFiles = message.extra.filter(
|
|
|
|
|
(extra: DatabaseMessageExtra): extra is DatabaseMessageExtraTextFile =>
|
|
|
|
|
extra.type === 'textFile'
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
for (const textFile of textFiles) {
|
|
|
|
|
contentParts.push({
|
|
|
|
|
type: 'text',
|
|
|
|
|
text: `\n\n--- File: ${textFile.name} ---\n${textFile.content}`
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-20 19:49:02 +02:00
|
|
|
// Handle legacy 'context' type from old webui (pasted content)
|
|
|
|
|
const legacyContextFiles = message.extra.filter(
|
|
|
|
|
(extra: DatabaseMessageExtra): extra is DatabaseMessageExtraLegacyContext =>
|
|
|
|
|
extra.type === 'context'
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
for (const legacyContextFile of legacyContextFiles) {
|
|
|
|
|
contentParts.push({
|
|
|
|
|
type: 'text',
|
|
|
|
|
text: `\n\n--- File: ${legacyContextFile.name} ---\n${legacyContextFile.content}`
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
2025-09-17 19:29:13 +02:00
|
|
|
const audioFiles = message.extra.filter(
|
|
|
|
|
(extra: DatabaseMessageExtra): extra is DatabaseMessageExtraAudioFile =>
|
|
|
|
|
extra.type === 'audioFile'
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
for (const audio of audioFiles) {
|
|
|
|
|
contentParts.push({
|
|
|
|
|
type: 'input_audio',
|
|
|
|
|
input_audio: {
|
|
|
|
|
data: audio.base64Data,
|
|
|
|
|
format: audio.mimeType.includes('wav') ? 'wav' : 'mp3'
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const pdfFiles = message.extra.filter(
|
|
|
|
|
(extra: DatabaseMessageExtra): extra is DatabaseMessageExtraPdfFile =>
|
|
|
|
|
extra.type === 'pdfFile'
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
for (const pdfFile of pdfFiles) {
|
|
|
|
|
if (pdfFile.processedAsImages && pdfFile.images) {
|
|
|
|
|
for (let i = 0; i < pdfFile.images.length; i++) {
|
|
|
|
|
contentParts.push({
|
|
|
|
|
type: 'image_url',
|
|
|
|
|
image_url: { url: pdfFile.images[i] }
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
contentParts.push({
|
|
|
|
|
type: 'text',
|
|
|
|
|
text: `\n\n--- PDF File: ${pdfFile.name} ---\n${pdfFile.content}`
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
role: message.role as 'user' | 'assistant' | 'system',
|
|
|
|
|
content: contentParts
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Get server properties - static method for API compatibility
|
|
|
|
|
*/
|
|
|
|
|
static async getServerProps(): Promise<ApiLlamaCppServerProps> {
|
|
|
|
|
try {
|
|
|
|
|
const currentConfig = config();
|
|
|
|
|
const apiKey = currentConfig.apiKey?.toString().trim();
|
|
|
|
|
|
2025-09-26 11:36:48 -04:00
|
|
|
const response = await fetch(`./props`, {
|
2025-09-17 19:29:13 +02:00
|
|
|
headers: {
|
|
|
|
|
'Content-Type': 'application/json',
|
|
|
|
|
...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {})
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
if (!response.ok) {
|
|
|
|
|
throw new Error(`Failed to fetch server props: ${response.status}`);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const data = await response.json();
|
|
|
|
|
return data;
|
|
|
|
|
} catch (error) {
|
|
|
|
|
console.error('Error fetching server props:', error);
|
|
|
|
|
throw error;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Aborts any ongoing chat completion request.
|
|
|
|
|
* Cancels the current request and cleans up the abort controller.
|
|
|
|
|
*
|
|
|
|
|
* @public
|
|
|
|
|
*/
|
2025-10-20 12:41:13 +02:00
|
|
|
public abort(conversationId?: string): void {
|
|
|
|
|
if (conversationId) {
|
|
|
|
|
const abortController = this.abortControllers.get(conversationId);
|
|
|
|
|
if (abortController) {
|
|
|
|
|
abortController.abort();
|
|
|
|
|
this.abortControllers.delete(conversationId);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
for (const controller of this.abortControllers.values()) {
|
|
|
|
|
controller.abort();
|
|
|
|
|
}
|
|
|
|
|
this.abortControllers.clear();
|
2025-09-17 19:29:13 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Injects a system message at the beginning of the conversation if configured in settings.
|
|
|
|
|
* Checks for existing system messages to avoid duplication and retrieves the system message
|
|
|
|
|
* from the current configuration settings.
|
|
|
|
|
*
|
|
|
|
|
* @param messages - Array of chat messages to process
|
|
|
|
|
* @returns Array of messages with system message injected at the beginning if configured
|
|
|
|
|
* @private
|
|
|
|
|
*/
|
|
|
|
|
private injectSystemMessage(messages: ApiChatMessageData[]): ApiChatMessageData[] {
|
|
|
|
|
const currentConfig = config();
|
|
|
|
|
const systemMessage = currentConfig.systemMessage?.toString().trim();
|
|
|
|
|
|
|
|
|
|
if (!systemMessage) {
|
|
|
|
|
return messages;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (messages.length > 0 && messages[0].role === 'system') {
|
|
|
|
|
if (messages[0].content !== systemMessage) {
|
|
|
|
|
const updatedMessages = [...messages];
|
|
|
|
|
updatedMessages[0] = {
|
|
|
|
|
role: 'system',
|
|
|
|
|
content: systemMessage
|
|
|
|
|
};
|
|
|
|
|
return updatedMessages;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return messages;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const systemMsg: ApiChatMessageData = {
|
|
|
|
|
role: 'system',
|
|
|
|
|
content: systemMessage
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
return [systemMsg, ...messages];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Parses error response and creates appropriate error with context information
|
|
|
|
|
* @param response - HTTP response object
|
|
|
|
|
* @returns Promise<Error> - Parsed error with context info if available
|
|
|
|
|
*/
|
|
|
|
|
private async parseErrorResponse(response: Response): Promise<Error> {
|
|
|
|
|
try {
|
|
|
|
|
const errorText = await response.text();
|
|
|
|
|
const errorData: ApiErrorResponse = JSON.parse(errorText);
|
|
|
|
|
|
|
|
|
|
const message = errorData.error?.message || 'Unknown server error';
|
2025-10-12 18:06:41 +02:00
|
|
|
const error = new Error(message);
|
|
|
|
|
error.name = response.status === 400 ? 'ServerError' : 'HttpError';
|
|
|
|
|
|
|
|
|
|
return error;
|
2025-09-17 19:29:13 +02:00
|
|
|
} catch {
|
2025-10-12 18:06:41 +02:00
|
|
|
const fallback = new Error(`Server error (${response.status}): ${response.statusText}`);
|
|
|
|
|
fallback.name = 'HttpError';
|
|
|
|
|
return fallback;
|
2025-09-17 19:29:13 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
webui: introduce OpenAI-compatible model selector in JSON payload (#16562)
* webui: introduce OpenAI-compatible model selector in JSON payload
* webui: restore OpenAI-Compatible model source of truth and unify metadata capture
This change re-establishes a single, reliable source of truth for the active model:
fully aligned with the OpenAI-Compat API behavior
It introduces a unified metadata flow that captures the model field from both
streaming and non-streaming responses, wiring a new onModel callback through ChatService
The model name is now resolved directly from the API payload rather than relying on
server /props or UI assumptions
ChatStore records and persists the resolved model for each assistant message during
streaming, ensuring consistency across the UI and database
Type definitions for API and settings were also extended to include model metadata
and the onModel callback, completing the alignment with OpenAI-Compat semantics
* webui: address review feedback from allozaur
* webui: move model selector into ChatForm (idea by @allozaur)
* webui: make model selector more subtle and integrated into ChatForm
* webui: replaced the Flowbite selector with a native Svelte dropdown
* webui: add developer setting to toggle the chat model selector
* webui: address review feedback from allozaur
Normalized streamed model names during chat updates
by trimming input and removing directory components before saving
or persisting them, so the conversation UI shows only the filename
Forced model names within the chat form selector dropdown to render as
a single-line, truncated entry with a tooltip revealing the full name
* webui: toggle displayed model source for legacy vs OpenAI-Compat modes
When the selector is disabled, it falls back to the active server model name from /props
When the model selector is enabled, the displayed model comes from the message metadata
(the one explicitly selected and sent in the request)
* Update tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/constants/localstorage-keys.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormModelSelector.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/services/chat.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/services/chat.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: refactor model selector and persistence helpers
- Replace inline portal and event listeners with proper Svelte bindings
- Introduce 'persisted' store helper for localStorage sync without runes
- Extract 'normalizeModelName' utils + Vitest coverage
- Simplify ChatFormModelSelector structure and cleanup logic
Replaced the persisted store helper's use of '$state/$effect' runes with
a plain TS implementation to prevent orphaned effect runtime errors
outside component context
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: document normalizeModelName usage with inline examples
* Update tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormModelSelector.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/stores/models.svelte.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* Update tools/server/webui/src/lib/stores/models.svelte.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: extract ModelOption type into dedicated models.d.ts
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* webui: refine ChatMessageAssistant displayedModel source logic
* webui: stabilize dropdown, simplify model extraction, and init assistant model field
* chore: update webui static build
* Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
* chore: npm format, update webui static build
* webui: align sidebar trigger position, remove z-index glitch
* chore: update webui build output
---------
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
2025-10-22 16:58:23 +02:00
|
|
|
private extractModelName(data: unknown): string | undefined {
|
|
|
|
|
const asRecord = (value: unknown): Record<string, unknown> | undefined => {
|
|
|
|
|
return typeof value === 'object' && value !== null
|
|
|
|
|
? (value as Record<string, unknown>)
|
|
|
|
|
: undefined;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const getTrimmedString = (value: unknown): string | undefined => {
|
|
|
|
|
return typeof value === 'string' && value.trim() ? value.trim() : undefined;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const root = asRecord(data);
|
|
|
|
|
if (!root) return undefined;
|
|
|
|
|
|
|
|
|
|
// 1) root (some implementations provide `model` at the top level)
|
|
|
|
|
const rootModel = getTrimmedString(root.model);
|
|
|
|
|
if (rootModel) return rootModel;
|
|
|
|
|
|
|
|
|
|
// 2) streaming choice (delta) or final response (message)
|
|
|
|
|
const firstChoice = Array.isArray(root.choices) ? asRecord(root.choices[0]) : undefined;
|
|
|
|
|
if (!firstChoice) return undefined;
|
|
|
|
|
|
|
|
|
|
// priority: delta.model (first chunk) else message.model (final response)
|
|
|
|
|
const deltaModel = getTrimmedString(asRecord(firstChoice.delta)?.model);
|
|
|
|
|
if (deltaModel) return deltaModel;
|
|
|
|
|
|
|
|
|
|
const messageModel = getTrimmedString(asRecord(firstChoice.message)?.model);
|
|
|
|
|
if (messageModel) return messageModel;
|
|
|
|
|
|
|
|
|
|
// avoid guessing from non-standard locations (metadata, etc.)
|
|
|
|
|
return undefined;
|
|
|
|
|
}
|
|
|
|
|
|
2025-09-17 19:29:13 +02:00
|
|
|
private updateProcessingState(
|
|
|
|
|
timings?: ChatMessageTimings,
|
2025-10-20 12:41:13 +02:00
|
|
|
promptProgress?: ChatMessagePromptProgress,
|
|
|
|
|
conversationId?: string
|
2025-09-17 19:29:13 +02:00
|
|
|
): void {
|
|
|
|
|
const tokensPerSecond =
|
|
|
|
|
timings?.predicted_ms && timings?.predicted_n
|
|
|
|
|
? (timings.predicted_n / timings.predicted_ms) * 1000
|
|
|
|
|
: 0;
|
|
|
|
|
|
|
|
|
|
slotsService
|
2025-10-20 12:41:13 +02:00
|
|
|
.updateFromTimingData(
|
|
|
|
|
{
|
|
|
|
|
prompt_n: timings?.prompt_n || 0,
|
|
|
|
|
predicted_n: timings?.predicted_n || 0,
|
|
|
|
|
predicted_per_second: tokensPerSecond,
|
|
|
|
|
cache_n: timings?.cache_n || 0,
|
|
|
|
|
prompt_progress: promptProgress
|
|
|
|
|
},
|
|
|
|
|
conversationId
|
|
|
|
|
)
|
2025-09-17 19:29:13 +02:00
|
|
|
.catch((error) => {
|
|
|
|
|
console.warn('Failed to update processing state:', error);
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export const chatService = new ChatService();
|