Files
hermes-webui/tests/test_static_asset_compression_and_cache.py
T

273 lines
10 KiB
Python

"""Regression tests for static-asset compression + cache headers in _serve_static.
Pre-fix shape:
/static/* served raw bytes with `Cache-Control: no-store` and no
`Content-Encoding`. A page reload over a slow link re-downloaded the
full ~2.4 MB shell on every visit, even though every reference in
static/index.html and static/sw.js carries `?v=__WEBUI_VERSION__`
fingerprinting that already guarantees a fresh URL on redeploy.
Fix: _serve_static now negotiates gzip when the client opts in, emits
weak ETags for conditional GETs, and sends `max-age=31536000, immutable`
when the request URL carries a `?v=…` fingerprint (`max-age=300`
otherwise). Bytes + headers are cached in-process and invalidated on
(size, mtime) change so a redeploy is picked up without a restart.
These tests pin both halves — header policy AND the cache-invalidation
contract — so future refactors of _serve_static cannot silently
re-introduce no-store or break the gzip/304 path.
"""
import gzip
from types import SimpleNamespace
from urllib.parse import urlparse
class _FakeHandler:
"""Minimal request handler stand-in matching tests/test_session_static_assets.py."""
def __init__(self, request_headers=None):
self.status = None
self.sent_headers = []
self.body = bytearray()
self.wfile = self
self.headers = dict(request_headers or {})
def send_response(self, status):
self.status = status
def send_header(self, name, value):
self.sent_headers.append((name, value))
def end_headers(self):
pass
def write(self, data):
self.body.extend(data)
def header(self, name):
for key, value in self.sent_headers:
if key.lower() == name.lower():
return value
return None
def _make_static_file(static_root, name, content):
path = static_root / name
path.write_bytes(content if isinstance(content, bytes) else content.encode("utf-8"))
return path
def _serve(routes, path, query="", request_headers=None):
"""Invoke _serve_static via the real urllib parse path."""
parsed = urlparse(f"http://x{path}{('?' + query) if query else ''}")
h = _FakeHandler(request_headers)
routes._serve_static(h, parsed)
return h
def _patch_static_root(monkeypatch, static_root):
"""Force _serve_static to read from a temp directory and clear its cache."""
from api import routes
monkeypatch.setattr(
routes, "_serve_static",
lambda handler, parsed, _root=static_root, _orig=routes._serve_static: _orig(handler, parsed),
)
# Tests redirect by writing files to the real static dir's parent layout
# via a fixture; instead we monkeypatch the module-level Path computation.
# _serve_static derives static_root from `Path(__file__).parent.parent / "static"`,
# so we monkeypatch __file__ via a closure that re-resolves with our temp tree.
# Simpler: patch the cache and call the real function with a parsed path that
# resolves under the real static dir. We use the fixture below instead.
# ── Fixture: build a tiny isolated static tree and rebind paths ───────────
import pytest
@pytest.fixture
def isolated_static(tmp_path, monkeypatch):
"""Stand up an isolated static/ tree and rebind _serve_static to use it.
Yields the static_root Path so tests can drop files into it.
"""
from api import routes
static_root = tmp_path / "static"
static_root.mkdir()
# Patch the cache so cross-test state cannot leak.
monkeypatch.setattr(routes, "_STATIC_CACHE", {}, raising=True)
# _serve_static derives static_root from Path(__file__).parent.parent.
# Rebind by monkeypatching Path resolution: we wrap the function so the
# caller-visible signature is unchanged.
original = routes._serve_static
def wrapped(handler, parsed):
# Trick: temporarily monkeypatch Path so the function sees our temp tree.
import api.routes as ar
orig_file = ar.__file__
# Place a sentinel api/routes.py "next to" tmp_path so the relative
# walk lands in our static_root.
fake_api_dir = tmp_path / "api"
fake_api_dir.mkdir(exist_ok=True)
fake_routes = fake_api_dir / "routes.py"
if not fake_routes.exists():
fake_routes.write_text("# stub for path resolution\n")
monkeypatch.setattr(ar, "__file__", str(fake_routes))
try:
return original(handler, parsed)
finally:
monkeypatch.setattr(ar, "__file__", orig_file)
monkeypatch.setattr(routes, "_serve_static", wrapped)
yield static_root
# ── Tests ─────────────────────────────────────────────────────────────────
def test_plain_get_returns_raw_bytes_with_etag(isolated_static):
from api import routes
payload = b"console.log('hello');\n" * 200 # > 1 KB so gzip-eligible
_make_static_file(isolated_static, "ui.js", payload)
h = _serve(routes, "/static/ui.js")
assert h.status == 200
assert h.header("Content-Type") == "application/javascript; charset=utf-8"
assert h.header("Content-Encoding") is None # no gzip without Accept-Encoding
assert h.header("ETag") is not None and h.header("ETag").startswith('W/"')
assert h.header("Cache-Control") == "public, max-age=300" # no fingerprint
assert bytes(h.body) == payload
def test_gzip_negotiated_when_client_accepts(isolated_static):
from api import routes
payload = (b"a" * 50_000)
_make_static_file(isolated_static, "ui.js", payload)
h = _serve(routes, "/static/ui.js", request_headers={"Accept-Encoding": "gzip, deflate"})
assert h.status == 200
assert h.header("Content-Encoding") == "gzip"
assert h.header("Vary") == "Accept-Encoding"
assert gzip.decompress(bytes(h.body)) == payload
assert int(h.header("Content-Length")) == len(h.body) < len(payload)
def test_fingerprinted_url_gets_immutable_cache(isolated_static):
from api import routes
_make_static_file(isolated_static, "ui.js", b"x" * 2000)
h = _serve(routes, "/static/ui.js", query="v=abc1234")
assert h.header("Cache-Control") == "public, max-age=31536000, immutable"
def test_empty_fingerprint_value_gets_short_cache(isolated_static):
"""Only a non-empty version token is an immutable-cache fingerprint."""
from api import routes
_make_static_file(isolated_static, "ui.js", b"x" * 2000)
h = _serve(routes, "/static/ui.js", query="v=")
assert h.header("Cache-Control") == "public, max-age=300"
def test_unfingerprinted_url_gets_short_cache(isolated_static):
from api import routes
_make_static_file(isolated_static, "ui.js", b"x" * 2000)
h = _serve(routes, "/static/ui.js")
assert h.header("Cache-Control") == "public, max-age=300"
def test_conditional_get_returns_304(isolated_static):
from api import routes
_make_static_file(isolated_static, "ui.js", b"hello world\n" * 100)
first = _serve(routes, "/static/ui.js", query="v=abc")
etag = first.header("ETag")
assert etag is not None
second = _serve(routes, "/static/ui.js", query="v=abc",
request_headers={"If-None-Match": etag})
assert second.status == 304
assert second.header("ETag") == etag
assert second.header("Cache-Control") == "public, max-age=31536000, immutable"
assert second.header("Vary") == "Accept-Encoding"
assert bytes(second.body) == b""
def test_etag_changes_when_file_changes(isolated_static):
"""Cache must invalidate when (size, mtime) changes — guards redeploy correctness."""
import time
from api import routes
f = _make_static_file(isolated_static, "ui.js", b"v1" * 1000)
first = _serve(routes, "/static/ui.js")
etag_v1 = first.header("ETag")
# Touch with a later mtime (1 s granularity matches the ETag formula).
time.sleep(1.1)
f.write_bytes(b"v2-different-content" * 50)
second = _serve(routes, "/static/ui.js")
etag_v2 = second.header("ETag")
assert etag_v1 != etag_v2
# Old ETag now produces a 200, not a stale 304.
third = _serve(routes, "/static/ui.js", request_headers={"If-None-Match": etag_v1})
assert third.status == 200
def test_etag_changes_for_same_size_edits_within_same_second(isolated_static):
"""The cache signature must keep sub-second mtime precision."""
import os
from api import routes
f = _make_static_file(isolated_static, "ui.js", b"a" * 2048)
second = 1_900_000_000
os.utime(f, ns=(second * 1_000_000_000, second * 1_000_000_000))
first = _serve(routes, "/static/ui.js")
etag_v1 = first.header("ETag")
f.write_bytes(b"b" * 2048)
os.utime(f, ns=(second * 1_000_000_000 + 123_000_000,
second * 1_000_000_000 + 123_000_000))
second_response = _serve(routes, "/static/ui.js")
assert second_response.header("ETag") != etag_v1
assert bytes(second_response.body) == b"b" * 2048
def test_image_is_not_gzipped(isolated_static):
"""Already-compressed binary types must skip gzip to avoid wasted CPU."""
from api import routes
# 4 KB of pseudo-PNG (real header doesn't matter, only the MIME does)
_make_static_file(isolated_static, "favicon.png", b"\x89PNG\r\n\x1a\n" + b"\x00" * 4000)
h = _serve(routes, "/static/favicon.png", request_headers={"Accept-Encoding": "gzip"})
assert h.status == 200
assert h.header("Content-Encoding") is None
assert h.header("Content-Type") == "image/png"
def test_tiny_file_is_not_gzipped(isolated_static):
"""Files under 1 KB skip gzip — framing overhead exceeds savings."""
from api import routes
_make_static_file(isolated_static, "tiny.js", b"export {};\n")
h = _serve(routes, "/static/tiny.js", request_headers={"Accept-Encoding": "gzip"})
assert h.status == 200
assert h.header("Content-Encoding") is None
def test_path_traversal_still_rejected(isolated_static):
"""Sandbox check from the original implementation must remain intact."""
from api import routes
_make_static_file(isolated_static, "ui.js", b"ok")
# Try to break out of static/ — must 404, not serve external files.
h = _serve(routes, "/static/../api/routes.py")
assert h.status == 404