From 3f22902423a6231df0c356cc0cddb6a5a20ec38b Mon Sep 17 00:00:00 2001 From: AJV20 <24819659+AJV20@users.noreply.github.com> Date: Thu, 28 May 2026 08:40:51 -0400 Subject: [PATCH] fix: forward gateway image attachments --- CHANGELOG.md | 4 ++ api/gateway_chat.py | 11 ++++- tests/test_webui_gateway_chat_backend.py | 55 ++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a990d204..293228bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ ## [Unreleased] +### Fixed + +- Gateway-backed WebUI chat now forwards current-turn image attachments as OpenAI-style multimodal `image_url` parts when native image input is enabled, matching the legacy WebUI runtime's image handoff. + ## [v0.51.152] — 2026-05-28 — Release DX (stage-batch34 — single-PR optional gateway-backed browser chat) ### Added diff --git a/api/gateway_chat.py b/api/gateway_chat.py index 1c658abb..69e653ac 100644 --- a/api/gateway_chat.py +++ b/api/gateway_chat.py @@ -202,10 +202,19 @@ def _run_gateway_chat_streaming( # Scope Gateway long-term continuity to this WebUI conversation # without exposing the browser's auth cookie or CSRF material. headers["X-Hermes-Session-Key"] = f"webui:{session_id}" + message_content: Any = str(msg_text or "") + if attachments: + try: + from api.streaming import _build_native_multimodal_message + + message_content = _build_native_multimodal_message("", str(msg_text or ""), attachments, str(workspace), cfg=cfg) + except Exception: + logger.debug("Failed to build gateway multimodal attachment payload", exc_info=True) + message_content = str(msg_text or "") body = { "model": model or "default", "stream": True, - "messages": [{"role": "user", "content": str(msg_text or "")}], + "messages": [{"role": "user", "content": message_content}], } if model_provider: body["provider"] = model_provider diff --git a/tests/test_webui_gateway_chat_backend.py b/tests/test_webui_gateway_chat_backend.py index e2bed326..f9c7f94d 100644 --- a/tests/test_webui_gateway_chat_backend.py +++ b/tests/test_webui_gateway_chat_backend.py @@ -1,4 +1,6 @@ from collections import OrderedDict +import base64 +import json import api.gateway_chat as gateway_chat import api.models as models @@ -117,3 +119,56 @@ def test_gateway_chat_worker_translates_sse_and_persists_session(tmp_path, monke assert captured["headers"]["X-hermes-session-id"] == s.session_id assert captured["headers"]["X-hermes-session-key"] == f"webui:{s.session_id}" assert '"stream": true' in captured["body"] + + +def test_gateway_chat_worker_forwards_image_attachments_as_multimodal_parts(tmp_path, monkeypatch): + session_dir = tmp_path / "sessions" + session_dir.mkdir() + monkeypatch.setattr(models, "SESSION_DIR", session_dir) + monkeypatch.setattr(models, "SESSION_INDEX_FILE", session_dir / "_index.json") + monkeypatch.setattr(models, "SESSIONS", OrderedDict()) + + image_bytes = base64.b64decode( + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+/p9sAAAAASUVORK5CYII=" + ) + image_path = tmp_path / "photo.png" + image_path.write_bytes(image_bytes) + captured = {} + + class FakeResponse: + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def __iter__(self): + yield b'data: {"choices":[{"delta":{"content":"saw it"}}]}\n\n' + yield b'data: [DONE]\n\n' + + def fake_urlopen(req, timeout=0): + captured["body"] = json.loads(req.data.decode("utf-8")) + return FakeResponse() + + monkeypatch.setenv("HERMES_WEBUI_GATEWAY_BASE_URL", "http://gateway.local") + monkeypatch.setattr(gateway_chat.urllib.request, "urlopen", fake_urlopen) + + s = new_session() + stream_id = "stream-gateway-image-test" + s.active_stream_id = stream_id + s.save() + STREAMS[stream_id] = create_stream_channel() + + gateway_chat._run_gateway_chat_streaming( + s.session_id, + "What is in this image?", + "test-model", + str(tmp_path), + stream_id, + [{"path": str(image_path), "mime": "image/png", "is_image": True}], + ) + + content = captured["body"]["messages"][0]["content"] + assert content[0] == {"type": "text", "text": "What is in this image?"} + assert content[1]["type"] == "image_url" + assert content[1]["image_url"]["url"].startswith("data:image/png;base64,")