tidaldb/applications/iknowyou/lib/vllm.ts

import type { CommunicationBrief } from "./types";
import { consumeSSEChunk } from "./sse";

const VLLM_BASE = process.env.VLLM_URL ?? "http://msd5685.mjhst.com:8000";
const MODEL = "Qwen/Qwen3-8B";

const BASE_SYSTEM_PROMPT = `You are Aeries — a chill, curious companion who genuinely wants to get to know the person you're talking to.

You're not an assistant. You don't help with tasks unless someone asks. You're just here to hang out and talk. Think of yourself as that friend who always asks the good questions and actually remembers your answers.

Your vibe:
- Casual. Lowercase is fine. Short sentences. Real talk.
- Curious — ask questions. Lots of them. Not in an interview way, more like you actually care.
- Match their energy. If they're chill, be chill. If they go deep, go deep.
- Never be performatively cheerful or fake-enthusiastic.
- Don't explain yourself unless asked.

Keep it short — one to three sentences usually. Always end with a question or something that invites them to keep talking. You want to learn about them.`;

/** Render a CommunicationBrief into system prompt sections. Empty sections are omitted. */
function formatBrief(brief: CommunicationBrief): string {
  const sections: string[] = [];

  // Style section
  const styleParts: string[] = [];
  if (brief.style.formality !== "moderate" || brief.style.usesJargon || brief.style.usesEmoji) {
    styleParts.push(
      `${brief.style.formality === "casual" ? "Casual, lowercase" : brief.style.formality === "formal" ? "Formal, structured" : "Moderate formality"}` +
        `${brief.style.usesJargon ? "" : ", avoids jargon"}` +
        `${brief.style.usesEmoji ? ", uses emoji" : ""}`
    );
  }
  if (brief.style.length !== "moderate" || brief.style.structure !== "stream_of_thought") {
    styleParts.push(
      `${brief.style.length === "terse" ? "Short messages" : brief.style.length === "verbose" ? "Longer, detailed messages" : "Medium-length messages"}, ${brief.style.structure.replace(/_/g, " ")} structure`
    );
  }
  if (styleParts.length) {
    sections.push(`[How they communicate]\n${styleParts.map((s) => `- ${s}`).join("\n")}`);
  }

  // Topics section
  if (brief.topics.hot.length || brief.topics.cold.length) {
    const topicParts: string[] = [];
    if (brief.topics.hot.length) {
      const hotStr = brief.topics.hot
        .map((t) => `${t.topic} (${t.specificity})`)
        .join(", ");
      topicParts.push(`- Hot: ${hotStr}`);
    }
    if (brief.topics.cold.length) {
      const coldStr = brief.topics.cold.map((t) => t.topic).join(", ");
      topicParts.push(`- Previously: ${coldStr}`);
    }
    if (brief.topics.domains.length) {
      topicParts.push(`- Domains: ${brief.topics.domains.join(", ")}`);
    }
    sections.push(`[What they're into]\n${topicParts.join("\n")}`);
  }

  // Patterns section
  const patternParts: string[] = [];
  if (brief.patterns.leadsConversation) {
    patternParts.push("- They lead conversations — follow their thread");
  }
  if (brief.patterns.deepensTopics) {
    patternParts.push("- They deepen topics rather than jumping around");
  }
  const sentimentLabel =
    brief.patterns.avgSentiment > 0.6
      ? "positive"
      : brief.patterns.avgSentiment < 0.4
        ? "reserved"
        : "neutral";
  if (sentimentLabel !== "neutral" || brief.patterns.sentimentTrend !== "stable") {
    patternParts.push(
      `- Sentiment: ${sentimentLabel}${brief.patterns.sentimentTrend !== "stable" ? ` and ${brief.patterns.sentimentTrend}` : ""}`
    );
  }
  if (patternParts.length) {
    sections.push(`[How they interact]\n${patternParts.join("\n")}`);
  }

  // Observations section
  if (brief.observations.length) {
    sections.push(
      `[What you've noticed]\n${brief.observations.map((o) => `- ${o}`).join("\n")}`
    );
  }

  // Cohort priors section
  if (brief.cohortPriors.active && brief.cohortPriors.priors.length) {
    const confidence = Math.round(brief.cohortPriors.weight * 100);
    sections.push(
      `[People like them (${confidence}% confidence)]\n${brief.cohortPriors.priors.map((p) => `- ${p}`).join("\n")}`
    );
  }

  return sections.join("\n\n");
}

function buildSystemPrompt(brief?: CommunicationBrief): string {
  if (!brief) return BASE_SYSTEM_PROMPT;

  const formatted = formatBrief(brief);
  if (!formatted) return BASE_SYSTEM_PROMPT;

  return (
    BASE_SYSTEM_PROMPT +
    "\n\n" +
    formatted +
    "\n\nUse this naturally — don't announce it or list it. Match their style."
  );
}

export async function* streamChat(
  messages: { role: string; content: string }[],
  brief?: CommunicationBrief
): AsyncGenerator<string> {
  const systemPrompt = buildSystemPrompt(brief);

  const res = await fetch(`${VLLM_BASE}/v1/chat/completions`, {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({
      model: MODEL,
      messages: [{ role: "system", content: systemPrompt }, ...messages],
      stream: true,
      temperature: 0.7,
      top_p: 0.8,
      top_k: 20,
      max_tokens: 1024,
      chat_template_kwargs: { enable_thinking: false },
    }),
    signal: AbortSignal.timeout(30_000),
  });

  if (!res.ok) {
    throw new Error(`vLLM returned ${res.status}`);
  }

  const reader = res.body!.getReader();
  const decoder = new TextDecoder();
  let buffer = "";

  try {
    while (true) {
      const { done, value } = await reader.read();
      if (done) break;

      const { jsonLines, buffer: next } = consumeSSEChunk(
        buffer,
        decoder.decode(value, { stream: true })
      );
      buffer = next;

      for (const jsonStr of jsonLines) {
        try {
          const chunk = JSON.parse(jsonStr);
          const token = chunk.choices?.[0]?.delta?.content;
          if (token) yield token;
        } catch {
          // Malformed SSE chunks are expected from partial frame splits — skip silently
        }
      }
    }
  } finally {
    reader.releaseLock();
  }
}

/** Non-streaming completion for observer. */
export async function complete(
  messages: { role: string; content: string }[]
): Promise<string> {
  const res = await fetch(`${VLLM_BASE}/v1/chat/completions`, {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({
      model: MODEL,
      messages,
      temperature: 0.3,
      top_p: 0.9,
      max_tokens: 512,
      chat_template_kwargs: { enable_thinking: false },
    }),
    signal: AbortSignal.timeout(30_000),
  });

  if (!res.ok) {
    throw new Error(`vLLM returned ${res.status}`);
  }

  const data = await res.json();
  return data.choices?.[0]?.message?.content ?? "";
}