tidaldb/applications/iknowyou/lib/cohorts.ts

import { remember, recallByTag } from "./synap";
import type {
  ObserverOutput,
  PersonProfile,
  CohortDefinition,
  CohortMembership,
} from "./types";

// ---------------------------------------------------------------------------
// Cohort definitions — rule-based behavioral clusters
// ---------------------------------------------------------------------------

export const COHORT_DEFINITIONS: CohortDefinition[] = [
  {
    name: "casual",
    description: "Uses informal, relaxed communication style",
    match: (p) => (p.avgFormality < 0.4 ? 0.6 + (0.4 - p.avgFormality) : 0),
  },
  {
    name: "formal",
    description: "Uses structured, professional communication style",
    match: (p) => (p.avgFormality >= 0.6 ? 0.5 + (p.avgFormality - 0.6) : 0),
  },
  {
    name: "technical",
    description: "Uses jargon and goes beyond surface-level specificity",
    match: (p) =>
      p.jargonRate > 0.5 && p.topSpecificity !== "surface"
        ? 0.5 + p.jargonRate * 0.4
        : 0,
  },
  {
    name: "accessible",
    description: "Avoids jargon or stays at surface-level specificity",
    match: (p) =>
      p.jargonRate < 0.3 || p.topSpecificity === "surface"
        ? 0.6 + (1 - p.jargonRate) * 0.3
        : 0,
  },
  {
    name: "leader",
    description: "Tends to steer the conversation direction",
    match: (p) => (p.leaderRate > 0.6 ? 0.5 + (p.leaderRate - 0.6) : 0),
  },
  {
    name: "responder",
    description: "Follows the system's conversation lead",
    match: (p) => (p.leaderRate < 0.4 ? 0.5 + (0.4 - p.leaderRate) : 0),
  },
  {
    name: "positive-engager",
    description: "Generally positive sentiment in exchanges",
    match: (p) =>
      p.avgSentiment > 0.65 ? 0.5 + (p.avgSentiment - 0.65) * 2 : 0,
  },
  {
    name: "verbose",
    description: "Writes longer, more detailed messages",
    match: (p) => (p.avgWordCount > 40 ? Math.min(1, 0.5 + (p.avgWordCount - 40) / 80) : 0),
  },
  {
    name: "terse",
    description: "Writes short, concise messages",
    match: (p) => (p.avgWordCount < 15 ? 0.6 + (15 - p.avgWordCount) / 30 : 0),
  },
];

// ---------------------------------------------------------------------------
// Profile computation — running average from ObserverOutput[]
// ---------------------------------------------------------------------------

export function computeProfile(
  signals: ObserverOutput[],
  existing?: PersonProfile | null,
  personId?: string
): PersonProfile {
  if (!signals.length && existing) return existing;

  const id = personId ?? existing?.personId ?? "unknown";
  const prevCount = existing?.interactionCount ?? 0;
  const totalCount = prevCount + signals.length;

  // Compute averages from new signals
  let sumFormality = 0;
  let sumSentiment = 0;
  let sumWordCount = 0;
  let jargonTrue = 0;
  let leaderCount = 0;
  const specificityCount: Record<string, number> = {};
  const domainCount: Record<string, number> = {};

  for (const s of signals) {
    sumFormality += s.style.formality;
    sumSentiment += s.engagement.sentiment_score;
    sumWordCount += s.engagement.word_count;
    if (s.style.uses_jargon) jargonTrue++;
    if (s.dynamics.who_is_leading === "person") leaderCount++;

    const spec = s.topic.specificity;
    specificityCount[spec] = (specificityCount[spec] ?? 0) + 1;

    const dom = s.topic.domain;
    domainCount[dom] = (domainCount[dom] ?? 0) + 1;
  }

  const n = signals.length || 1;

  // Blend new averages with existing (weighted running average)
  const blend = (newAvg: number, oldAvg: number | undefined): number => {
    if (oldAvg === undefined || prevCount === 0) return newAvg;
    return (oldAvg * prevCount + newAvg * n) / totalCount;
  };

  const avgFormality = blend(sumFormality / n, existing?.avgFormality);
  const avgSentiment = blend(sumSentiment / n, existing?.avgSentiment);
  const avgWordCount = blend(sumWordCount / n, existing?.avgWordCount);
  const jargonRate = blend(jargonTrue / n, existing?.jargonRate);
  const leaderRate = blend(leaderCount / n, existing?.leaderRate);

  // Top specificity
  const topSpecificity = (
    Object.entries(specificityCount).sort((a, b) => b[1] - a[1])[0]?.[0] ??
    existing?.topSpecificity ??
    "surface"
  ) as "surface" | "intermediate" | "expert";

  // Top domains (merge with existing, keep top 5)
  const mergedDomains: Record<string, number> = {};
  if (existing?.topDomains) {
    for (const d of existing.topDomains) {
      mergedDomains[d] = (mergedDomains[d] ?? 0) + prevCount;
    }
  }
  for (const [d, c] of Object.entries(domainCount)) {
    mergedDomains[d] = (mergedDomains[d] ?? 0) + c;
  }
  const topDomains = Object.entries(mergedDomains)
    .sort((a, b) => b[1] - a[1])
    .slice(0, 5)
    .map(([d]) => d);

  const profile: PersonProfile = {
    personId: id,
    interactionCount: totalCount,
    avgFormality,
    avgSentiment,
    avgWordCount,
    jargonRate,
    leaderRate,
    topSpecificity,
    topDomains,
    cohorts: [],
    updatedAt: Date.now(),
  };

  // Assign cohorts
  profile.cohorts = assignCohorts(profile);

  return profile;
}

// ---------------------------------------------------------------------------
// Cohort assignment
// ---------------------------------------------------------------------------

export function assignCohorts(profile: PersonProfile): CohortMembership[] {
  const memberships: CohortMembership[] = [];

  for (const def of COHORT_DEFINITIONS) {
    const probability = def.match(profile);
    if (probability > 0.3) {
      memberships.push({
        cohort: def.name,
        probability: Math.min(1, probability),
      });
    }
  }

  return memberships.sort((a, b) => b.probability - a.probability);
}

// ---------------------------------------------------------------------------
// Cohort prior loading — query Synap for profiles of similar people
// ---------------------------------------------------------------------------

export async function loadCohortPriors(
  cohorts: CohortMembership[],
  interactionCount: number
): Promise<string[]> {
  if (!cohorts.length) return [];

  // Weight: fades as individual data grows
  // 1/(1 + n/10) → 1.0 at n=0, 0.5 at n=10, 0.25 at n=30
  const weight = 1 / (1 + interactionCount / 10);
  if (weight < 0.1) {
    console.log(
      `[cohorts] skipping priors — weight ${weight.toFixed(2)} too low (${interactionCount} interactions)`
    );
    return [];
  }

  const priors: string[] = [];

  // Query Synap for profiles tagged with matching cohorts
  for (const { cohort, probability } of cohorts.slice(0, 3)) {
    try {
      const result = await recallByTag(
        "person communication style",
        ["person-profile", `cohort:${cohort}`],
        5,
        0.2
      );

      const all = [
        ...result.memories.vivid,
        ...result.memories.associated,
      ];

      if (all.length) {
        const desc =
          COHORT_DEFINITIONS.find((d) => d.name === cohort)?.description ??
          cohort;
        priors.push(
          `People in the "${cohort}" group (${desc}) — weight: ${(weight * probability).toFixed(2)}`
        );
      }
    } catch (err) {
      console.error(`[cohorts] failed to load priors for ${cohort}:`, err);
    }
  }

  // Add the weight context for the LLM
  if (priors.length) {
    priors.unshift(
      `Cohort prior confidence: ${(weight * 100).toFixed(0)}% (${interactionCount} individual interactions so far)`
    );
  }

  return priors;
}

// ---------------------------------------------------------------------------
// Synap persistence
// ---------------------------------------------------------------------------

const profileCache = new Map<string, PersonProfile>();

export async function storeProfile(profile: PersonProfile): Promise<void> {
  profileCache.set(profile.personId, profile);

  const cohortTags = profile.cohorts.map((c) => `cohort:${c.cohort}`);

  const content = JSON.stringify({
    personId: profile.personId,
    interactionCount: profile.interactionCount,
    avgFormality: Number(profile.avgFormality.toFixed(3)),
    avgSentiment: Number(profile.avgSentiment.toFixed(3)),
    avgWordCount: Number(profile.avgWordCount.toFixed(1)),
    jargonRate: Number(profile.jargonRate.toFixed(3)),
    leaderRate: Number(profile.leaderRate.toFixed(3)),
    topSpecificity: profile.topSpecificity,
    topDomains: profile.topDomains,
    cohorts: profile.cohorts,
  });

  try {
    await remember(content, {
      confidence: 0.9,
      memoryType: "semantic",
      tags: [
        "person-profile",
        `person:${profile.personId}`,
        ...cohortTags,
      ],
    });
    console.log(
      `[cohorts] stored profile for ${profile.personId}: ${profile.cohorts.map((c) => c.cohort).join(", ") || "no cohorts yet"}`
    );
  } catch (err) {
    console.error("[cohorts] failed to store profile:", err);
  }
}

export async function loadProfile(
  personId: string
): Promise<PersonProfile | null> {
  // Check cache first
  const cached = profileCache.get(personId);
  if (cached) return cached;

  try {
    const result = await recallByTag(
      "person profile",
      ["person-profile", `person:${personId}`],
      1,
      0.2
    );

    const all = [...result.memories.vivid, ...result.memories.associated];
    if (!all.length) return null;

    const parsed = JSON.parse(all[0].content);
    const profile: PersonProfile = {
      personId: parsed.personId ?? personId,
      interactionCount: parsed.interactionCount ?? 0,
      avgFormality: parsed.avgFormality ?? 0.5,
      avgSentiment: parsed.avgSentiment ?? 0.5,
      avgWordCount: parsed.avgWordCount ?? 20,
      jargonRate: parsed.jargonRate ?? 0,
      leaderRate: parsed.leaderRate ?? 0.5,
      topSpecificity: parsed.topSpecificity ?? "surface",
      topDomains: parsed.topDomains ?? [],
      cohorts: parsed.cohorts ?? [],
      updatedAt: Date.now(),
    };

    profileCache.set(personId, profile);
    return profile;
  } catch (err) {
    console.error("[cohorts] failed to load profile:", err);
    return null;
  }
}