/** * Append a decoded SSE chunk to the running buffer and extract complete data lines. * * Returns the raw JSON strings from each `data: {...}` line and the updated * buffer (partial-line remainder). Skips the `[DONE]` sentinel. * * Both the vLLM server-side reader and the client-side chat reader share * this logic — keeping buffer management in one place prevents drift when * the SSE wire format changes. */ export function consumeSSEChunk( buffer: string, chunk: string ): { jsonLines: string[]; buffer: string } { const full = buffer + chunk; const parts = full.split("\n"); const newBuffer = parts.pop()!; const jsonLines: string[] = []; for (const part of parts) { const trimmed = part.trim(); if (!trimmed.startsWith("data: ") || trimmed === "data: [DONE]") continue; jsonLines.push(trimmed.slice(6)); } return { jsonLines, buffer: newBuffer }; }