From 112cad59b44793afe316b60276dcc350fbfc2c6a Mon Sep 17 00:00:00 2001
From: emozilla <emozilla@nousresearch.com>
Date: Tue, 12 May 2026 01:42:48 -0400
Subject: [PATCH] perf(desktop): memoize KaTeX renders so math streams without
 re-rendering

Wrap rehype-katex with a per-equation LRU cache (keyed by
displayMode + source text) and re-enable math during streaming.

Stock @streamdown/math runs rehype-katex on every markdown commit,
so each new token re-katexes every equation in the message. For
math-heavy responses (an equation derived step-by-step) that's
hundreds of ms of wasted work per token and the streaming UI
chokes. With memoization, each equation pays katex.renderToString
exactly once; subsequent tokens re-walk the tree but hit cache for
unchanged equations.

The wrapper mirrors rehype-katex's semantics exactly: same class
detection (language-math, math-inline, math-display), same
<pre>-walk-up for fenced math blocks, same parent.children.splice
replacement, same SKIP traversal, same strict-then-lenient render
strategy with VFile message reporting.

Cached children are structuredCloned on each splice so downstream
rehype plugins or toJsxRuntime can't mutate the cache.
---
 .../components/assistant-ui/markdown-text.tsx |  21 +-
 apps/desktop/src/lib/katex-memo.ts            | 270 ++++++++++++++++++
 2 files changed, 284 insertions(+), 7 deletions(-)
 create mode 100644 apps/desktop/src/lib/katex-memo.ts
diff --git a/apps/desktop/src/components/assistant-ui/markdown-text.tsx b/apps/desktop/src/components/assistant-ui/markdown-text.tsx
index e3b881473c..9b5f7ec649 100644
--- a/apps/desktop/src/components/assistant-ui/markdown-text.tsx
+++ b/apps/desktop/src/components/assistant-ui/markdown-text.tsx
@@ -7,7 +7,6 @@ import {
   type SyntaxHighlighterProps
 } from '@assistant-ui/react-streamdown'
 import { code } from '@streamdown/code'
-import { createMathPlugin } from '@streamdown/math'
 import { type ComponentProps, memo, useEffect, useMemo, useState } from 'react'
 
 import { PreviewAttachment } from '@/components/chat/preview-attachment'
@@ -15,6 +14,7 @@ import { SyntaxHighlighter } from '@/components/chat/shiki-highlighter'
 import { ZoomableImage } from '@/components/chat/zoomable-image'
 import { CopyButton } from '@/components/ui/copy-button'
 import { normalizeExternalUrl, openExternalLink, PrettyLink } from '@/lib/external-link'
+import { createMemoizedMathPlugin } from '@/lib/katex-memo'
 import { isLikelyProseCodeBlock, sanitizeLanguageTag } from '@/lib/markdown-code'
 import { preprocessMarkdown } from '@/lib/markdown-preprocess'
 import {
@@ -29,11 +29,18 @@ import { previewTargetFromMarkdownHref } from '@/lib/preview-targets'
 import { cn } from '@/lib/utils'
 
 // Math rendering plugin (KaTeX). Configured once at module scope — the
-// plugin is stateless so re-creating per-render is wasted work. Enable
-// `singleDollarTextMath` so models that emit `$x^2$` for inline math
-// (the de-facto convention in OpenAI / Anthropic outputs) render
-// correctly. The default false-setting only accepts `$$...$$` blocks.
-const mathPlugin = createMathPlugin({ singleDollarTextMath: true })
+// plugin is stateless beyond its internal cache so re-creating per-render
+// would needlessly thrash. We use a memoizing wrapper around rehype-katex
+// (see lib/katex-memo.ts) so that during streaming we re-katex only the
+// equations whose source actually changed since the last token. With the
+// stock @streamdown/math plugin every equation re-renders on every token,
+// which throttles UI updates badly for math-heavy responses; the memoized
+// plugin keeps the steady-state work proportional to "new equations
+// arriving" rather than "equations × tokens-per-second".
+//
+// `singleDollarTextMath: true` enables `$x^2$` for inline math (de-facto
+// LLM convention). The default false-setting only accepts `$$...$$`.
+const mathPlugin = createMemoizedMathPlugin({ singleDollarTextMath: true })
 
 function CodeHeader({ language, code }: { language?: string; code?: string }) {
   const normalizedCode = (code ?? '').replace(/^\n+/, '').trimEnd()
@@ -329,7 +336,7 @@ const MarkdownTextImpl = () => {
       lineNumbers={false}
       mode="streaming"
       parseIncompleteMarkdown={!isStreaming}
-      plugins={isStreaming ? undefined : { code, math: mathPlugin }}
+      plugins={{ math: mathPlugin, ...(isStreaming ? {} : { code }) }}
       preprocess={preprocessMarkdown}
       shikiTheme={['github-light-default', 'github-dark-default']}
     />
diff --git a/apps/desktop/src/lib/katex-memo.ts b/apps/desktop/src/lib/katex-memo.ts
new file mode 100644
index 0000000000..2f7b07ffb2
--- /dev/null
+++ b/apps/desktop/src/lib/katex-memo.ts
@@ -0,0 +1,270 @@
+/**
+ * Memoizing wrapper around `rehype-katex`.
+ *
+ * Why: the default `@streamdown/math` plugin runs `rehype-katex` on every
+ * markdown commit. During streaming, that means each new token re-runs
+ * KaTeX on EVERY math node in the message — including equations that
+ * haven't changed since the last token. For math-heavy responses (a
+ * model deriving an equation step-by-step) this becomes a major source
+ * of jank: 20 unchanged equations each pay ~5–20ms of katex.renderToString
+ * work per token, adding up to hundreds of ms of CPU bound work that
+ * delays the next streaming update.
+ *
+ * What this plugin does: walk the hast tree looking for the math nodes
+ * that `remark-math` emits (`<code class="math-inline">…</code>` for
+ * inline and `<pre><code class="math-display">…</code></pre>` for
+ * display), key them by `(displayMode, value)`, and serve them from an
+ * in-memory LRU cache when we've rendered the same equation before.
+ * Cache misses still go through `katex.renderToString`; cache hits
+ * return the previously generated hast subtree.
+ *
+ * Result: each unique equation only pays the katex cost once. Adding
+ * one new equation to a paragraph re-renders just that one equation
+ * instead of all of them. The cache is process-global so it survives
+ * moves between messages (e.g., re-rendering a session).
+ *
+ * Compatibility: the produced hast structure matches what `rehype-katex`
+ * itself produces — we use the same `hast-util-from-html-isomorphic`
+ * fragment parsing and the same parent-splice semantics, including the
+ * `<pre>`-walk-up for display mode. Drop-in replacement for the math
+ * slot in streamdown's PluginConfig.
+ *
+ * Wire it in via `createMemoizedMathPlugin`:
+ *
+ *   import { createMemoizedMathPlugin } from '@/lib/katex-memo'
+ *   const math = createMemoizedMathPlugin({ singleDollarTextMath: true })
+ *   <Streamdown plugins={{ math }} ... />
+ */
+
+import type { Element, ElementContent, Parent, Root } from 'hast'
+import { fromHtmlIsomorphic } from 'hast-util-from-html-isomorphic'
+import { toText } from 'hast-util-to-text'
+import katex from 'katex'
+import remarkMath from 'remark-math'
+import type { Pluggable } from 'unified'
+import { SKIP, visitParents } from 'unist-util-visit-parents'
+import type { VFile } from 'vfile'
+
+interface KatexMemoOptions {
+  /**
+   * Color used for KaTeX errors when we fall back to the lenient parser.
+   * Mirrors `@streamdown/math`'s default so the visual output is identical.
+   */
+  errorColor?: string
+}
+
+interface MathPluginConfig {
+  /**
+   * Match `singleDollarTextMath` from `@streamdown/math`. When true the
+   * remark-math parser treats `$x$` as inline math; when false it requires
+   * `$$x$$`. Models almost always emit the single-dollar form, so we
+   * default it to true at the createMemoizedMathPlugin call site.
+   */
+  singleDollarTextMath?: boolean
+  errorColor?: string
+}
+
+/** Cached rendered hast — children to splice into the math node's parent. */
+type CachedRender = ElementContent[]
+
+const CACHE_LIMIT = 512
+
+class LruCache<K, V> {
+  private readonly map = new Map<K, V>()
+
+  get(key: K): undefined | V {
+    const value = this.map.get(key)
+
+    if (value === undefined) {
+      return undefined
+    }
+
+    // Refresh recency by re-inserting at the tail. Map iteration order is
+    // insertion order, so the oldest entry is at the head.
+    this.map.delete(key)
+    this.map.set(key, value)
+
+    return value
+  }
+
+  set(key: K, value: V): void {
+    if (this.map.has(key)) {
+      this.map.delete(key)
+    } else if (this.map.size >= CACHE_LIMIT) {
+      const oldest = this.map.keys().next().value
+
+      if (oldest !== undefined) {
+        this.map.delete(oldest)
+      }
+    }
+
+    this.map.set(key, value)
+  }
+}
+
+const cache = new LruCache<string, CachedRender>()
+
+function cacheKey(displayMode: boolean, value: string): string {
+  // `\u0001` is a control character that (a) won't appear in normal
+  // markdown and (b) is a single byte so the join is cheap.
+  return `${displayMode ? 'd' : 'i'}\u0001${value}`
+}
+
+/**
+ * Render one math expression with the same two-pass strategy `rehype-katex`
+ * uses internally: try strict first (so genuine TeX errors get reported in
+ * the VFile message stream), and on failure fall back to lenient mode so
+ * the document still renders without a thrown exception. The lenient
+ * fallback paints the equation in `errorColor` instead of erroring out.
+ */
+function renderMath(
+  value: string,
+  displayMode: boolean,
+  errorColor: string,
+  file: VFile,
+  element: Element
+): ElementContent[] {
+  let html: string
+
+  try {
+    html = katex.renderToString(value, { displayMode, throwOnError: true })
+  } catch (error) {
+    const cause = error as Error
+
+    file.message('Could not render math with KaTeX', {
+      cause,
+      place: element.position,
+      ruleId: cause.name?.toLowerCase() ?? 'katex',
+      source: 'rehype-katex-memo'
+    })
+
+    try {
+      html = katex.renderToString(value, {
+        displayMode,
+        errorColor,
+        strict: 'ignore',
+        throwOnError: false
+      })
+    } catch {
+      // Last-resort fallback — render the source text inside a styled span
+      // so the user at least sees what was supposed to be there. Mirrors
+      // rehype-katex's own escape hatch.
+      return [
+        {
+          type: 'element',
+          tagName: 'span',
+          properties: {
+            className: ['katex-error'],
+            style: `color:${errorColor}`,
+            title: String(error)
+          },
+          children: [{ type: 'text', value }]
+        }
+      ]
+    }
+  }
+
+  const fragment = fromHtmlIsomorphic(html, { fragment: true })
+
+  return fragment.children as ElementContent[]
+}
+
+/**
+ * The actual rehype plugin. Wraps `rehype-katex`'s logic with our LRU
+ * cache. Mirrors the upstream visitor exactly except for the cache lookup
+ * and an LRU.set on miss.
+ */
+function createMemoizedRehypeKatex(options: KatexMemoOptions = {}): Pluggable {
+  const errorColor = options.errorColor ?? 'var(--color-muted-foreground)'
+
+  return () =>
+    function transform(tree: Root, file: VFile): undefined {
+      visitParents(tree, 'element', (element, parents) => {
+        const classes = Array.isArray(element.properties?.className)
+          ? (element.properties.className as string[])
+          : []
+
+        // Match the same class set rehype-katex looks for. `language-math`
+        // is the markdown ` ```math ` form, `math-inline` is what
+        // remark-math emits for `$x$`, `math-display` for `$$x$$`.
+        const languageMath = classes.includes('language-math')
+        const mathDisplay = classes.includes('math-display')
+        const mathInline = classes.includes('math-inline')
+
+        if (!(languageMath || mathDisplay || mathInline)) {
+          return
+        }
+
+        let displayMode = mathDisplay
+        let scope: Element = element
+        let parent: Parent | undefined = parents[parents.length - 1]
+
+        // For ` ```math ` the scope walks up to the wrapping <pre> and
+        // we treat it as display math. Same logic rehype-katex uses.
+        if (
+          languageMath &&
+          parent &&
+          parent.type === 'element' &&
+          (parent as Element).tagName === 'pre'
+        ) {
+          scope = parent as Element
+          parent = parents[parents.length - 2]
+          displayMode = true
+        }
+
+        // No parent means the math node is at the root — there's nothing
+        // to splice into, so bail. This shouldn't happen for properly
+        // nested markdown but is the same defensive guard rehype-katex has.
+        if (!parent) {
+          return
+        }
+
+        const value = toText(scope, { whitespace: 'pre' })
+        const key = cacheKey(displayMode, value)
+        let cached = cache.get(key)
+
+        if (!cached) {
+          cached = renderMath(value, displayMode, errorColor, file, scope)
+          cache.set(key, cached)
+        }
+
+        // Splice CLONES of the cached children into the parent. Reusing
+        // the same node instances across renders would let downstream
+        // rehype plugins or toJsxRuntime mutate the cached subtree —
+        // breaking the next cache hit. structuredClone is ~100µs per
+        // equation, well below the ~5–20ms katex.renderToString cost
+        // we're avoiding.
+        const clonedChildren = cached.map(child => structuredClone(child))
+        const index = parent.children.indexOf(scope as ElementContent)
+
+        if (index === -1) {
+          return
+        }
+
+        parent.children.splice(index, 1, ...clonedChildren)
+
+        return SKIP
+      })
+    }
+}
+
+/**
+ * Build a streamdown MathPlugin object that uses the memoized rehype-katex
+ * wrapper. Drop-in for `@streamdown/math`'s `createMathPlugin`.
+ */
+export function createMemoizedMathPlugin(config: MathPluginConfig = {}) {
+  const remarkPlugin: Pluggable = [
+    remarkMath,
+    { singleDollarTextMath: config.singleDollarTextMath ?? false }
+  ]
+
+  const rehypePlugin = createMemoizedRehypeKatex({ errorColor: config.errorColor })
+
+  return {
+    name: 'katex' as const,
+    type: 'math' as const,
+    remarkPlugin,
+    rehypePlugin,
+    getStyles: () => 'katex/dist/katex.min.css'
+  }
+}