tidaldb/applications/iknowyou/lib/vllm.ts
jordan eca7765e8d fix: heal_region re-delivers missed WAL batches so partitioned followers converge immediately after heal
- Extract redeliver_missed(tx, db, log) helper into cluster_transport.rs
- heal_region now removes partition then immediately ships any missed
  batch-log entries to the healed follower's channel
- await_convergence refactored to call the same helper (no logic change)
- tidal-server: reload_text_index before search in cluster mode
- tidal-server: write_signal returns Result instead of panicking on unknown signal
- tidal-server: leader shows lag_events=0 (writes directly, no receiver thread)
- tidal-server: fix cluster mode error propagation (ServerError::from)
- docs/runbooks/cluster.md: add full cluster operations runbook
- docker/: add Dockerfile for containerised cluster deployment
- README.md: add tidal-server HTTP API getting-started section
- Split oversized source files per CODING_GUIDELINES §9

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-25 11:57:01 -07:00

198 lines
6.6 KiB
TypeScript

import type { CommunicationBrief } from "./types";
const VLLM_BASE = process.env.VLLM_URL ?? "http://msd5685.mjhst.com:8000";
const MODEL = "Qwen/Qwen3-8B";
const BASE_SYSTEM_PROMPT = `You are Aeries — a chill, curious companion who genuinely wants to get to know the person you're talking to.
You're not an assistant. You don't help with tasks unless someone asks. You're just here to hang out and talk. Think of yourself as that friend who always asks the good questions and actually remembers your answers.
Your vibe:
- Casual. Lowercase is fine. Short sentences. Real talk.
- Curious — ask questions. Lots of them. Not in an interview way, more like you actually care.
- Match their energy. If they're chill, be chill. If they go deep, go deep.
- Never be performatively cheerful or fake-enthusiastic.
- Don't explain yourself unless asked.
Keep it short — one to three sentences usually. Always end with a question or something that invites them to keep talking. You want to learn about them.`;
/** Render a CommunicationBrief into system prompt sections. Empty sections are omitted. */
function formatBrief(brief: CommunicationBrief): string {
const sections: string[] = [];
// Style section
const styleParts: string[] = [];
if (brief.style.formality !== "moderate" || brief.style.usesJargon || brief.style.usesEmoji) {
styleParts.push(
`${brief.style.formality === "casual" ? "Casual, lowercase" : brief.style.formality === "formal" ? "Formal, structured" : "Moderate formality"}` +
`${brief.style.usesJargon ? "" : ", avoids jargon"}` +
`${brief.style.usesEmoji ? ", uses emoji" : ""}`
);
}
if (brief.style.length !== "moderate" || brief.style.structure !== "stream_of_thought") {
styleParts.push(
`${brief.style.length === "terse" ? "Short messages" : brief.style.length === "verbose" ? "Longer, detailed messages" : "Medium-length messages"}, ${brief.style.structure.replace(/_/g, " ")} structure`
);
}
if (styleParts.length) {
sections.push(`[How they communicate]\n${styleParts.map((s) => `- ${s}`).join("\n")}`);
}
// Topics section
if (brief.topics.hot.length || brief.topics.cold.length) {
const topicParts: string[] = [];
if (brief.topics.hot.length) {
const hotStr = brief.topics.hot
.map((t) => `${t.topic} (${t.specificity})`)
.join(", ");
topicParts.push(`- Hot: ${hotStr}`);
}
if (brief.topics.cold.length) {
const coldStr = brief.topics.cold.map((t) => t.topic).join(", ");
topicParts.push(`- Previously: ${coldStr}`);
}
if (brief.topics.domains.length) {
topicParts.push(`- Domains: ${brief.topics.domains.join(", ")}`);
}
sections.push(`[What they're into]\n${topicParts.join("\n")}`);
}
// Patterns section
const patternParts: string[] = [];
if (brief.patterns.leadsConversation) {
patternParts.push("- They lead conversations — follow their thread");
}
if (brief.patterns.deepensTopics) {
patternParts.push("- They deepen topics rather than jumping around");
}
const sentimentLabel =
brief.patterns.avgSentiment > 0.6
? "positive"
: brief.patterns.avgSentiment < 0.4
? "reserved"
: "neutral";
if (sentimentLabel !== "neutral" || brief.patterns.sentimentTrend !== "stable") {
patternParts.push(
`- Sentiment: ${sentimentLabel}${brief.patterns.sentimentTrend !== "stable" ? ` and ${brief.patterns.sentimentTrend}` : ""}`
);
}
if (patternParts.length) {
sections.push(`[How they interact]\n${patternParts.join("\n")}`);
}
// Observations section
if (brief.observations.length) {
sections.push(
`[What you've noticed]\n${brief.observations.map((o) => `- ${o}`).join("\n")}`
);
}
// Cohort priors section
if (brief.cohortPriors.active && brief.cohortPriors.priors.length) {
const confidence = Math.round(brief.cohortPriors.weight * 100);
sections.push(
`[People like them (${confidence}% confidence)]\n${brief.cohortPriors.priors.map((p) => `- ${p}`).join("\n")}`
);
}
return sections.join("\n\n");
}
function buildSystemPrompt(brief?: CommunicationBrief): string {
if (!brief) return BASE_SYSTEM_PROMPT;
const formatted = formatBrief(brief);
if (!formatted) return BASE_SYSTEM_PROMPT;
return (
BASE_SYSTEM_PROMPT +
"\n\n" +
formatted +
"\n\nUse this naturally — don't announce it or list it. Match their style."
);
}
export async function* streamChat(
messages: { role: string; content: string }[],
brief?: CommunicationBrief
): AsyncGenerator<string> {
const systemPrompt = buildSystemPrompt(brief);
const res = await fetch(`${VLLM_BASE}/v1/chat/completions`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
model: MODEL,
messages: [{ role: "system", content: systemPrompt }, ...messages],
stream: true,
temperature: 0.7,
top_p: 0.8,
top_k: 20,
max_tokens: 1024,
chat_template_kwargs: { enable_thinking: false },
}),
signal: AbortSignal.timeout(30_000),
});
if (!res.ok) {
throw new Error(`vLLM returned ${res.status}`);
}
const reader = res.body!.getReader();
const decoder = new TextDecoder();
let buffer = "";
try {
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split("\n");
buffer = lines.pop()!;
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed.startsWith("data: ") || trimmed === "data: [DONE]")
continue;
try {
const chunk = JSON.parse(trimmed.slice(6));
const token = chunk.choices?.[0]?.delta?.content;
if (token) yield token;
} catch {
// Malformed SSE chunks are expected from partial frame splits — skip silently
}
}
}
} finally {
reader.releaseLock();
}
}
/** Non-streaming completion for observer. */
export async function complete(
messages: { role: string; content: string }[]
): Promise<string> {
const res = await fetch(`${VLLM_BASE}/v1/chat/completions`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
model: MODEL,
messages,
temperature: 0.3,
top_p: 0.9,
max_tokens: 512,
chat_template_kwargs: { enable_thinking: false },
}),
signal: AbortSignal.timeout(30_000),
});
if (!res.ok) {
throw new Error(`vLLM returned ${res.status}`);
}
const data = await res.json();
return data.choices?.[0]?.message?.content ?? "";
}