chore: reuse reqwest::Client across requests in forage embedder; minor forage updates

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
jordan 2026-02-23 23:16:32 -07:00
parent 213b8efcca
commit c1c5a10fbc
4 changed files with 96 additions and 13 deletions

View File

@ -53,7 +53,12 @@ struct Args {
#[derive(Clone)] #[derive(Clone)]
enum Mode { enum Mode {
Mock, Mock,
OpenAi { api_key: String }, /// `client` is created once at startup and reused across requests.
/// `reqwest::Client` is cheaply cloneable (`Arc`-backed connection pool).
OpenAi {
api_key: String,
client: reqwest::Client,
},
} }
#[derive(Deserialize)] #[derive(Deserialize)]
@ -70,7 +75,7 @@ struct EmbedResp {
async fn post_embed(State(mode): State<Arc<Mode>>, Json(req): Json<EmbedReq>) -> impl IntoResponse { async fn post_embed(State(mode): State<Arc<Mode>>, Json(req): Json<EmbedReq>) -> impl IntoResponse {
let vector = match mode.as_ref() { let vector = match mode.as_ref() {
Mode::Mock => mock_embed(&req.text), Mode::Mock => mock_embed(&req.text),
Mode::OpenAi { api_key } => match openai_embed(api_key, &req.text).await { Mode::OpenAi { api_key, client } => match openai_embed(client, api_key, &req.text).await {
Ok(v) => v, Ok(v) => v,
Err(e) => { Err(e) => {
return ( return (
@ -119,8 +124,11 @@ fn mock_embed(text: &str) -> Vec<f32> {
} }
/// OpenAI text-embedding-3-small call. /// OpenAI text-embedding-3-small call.
async fn openai_embed(api_key: &str, text: &str) -> Result<Vec<f32>, String> { async fn openai_embed(
let client = reqwest::Client::new(); client: &reqwest::Client,
api_key: &str,
text: &str,
) -> Result<Vec<f32>, String> {
let resp = client let resp = client
.post("https://api.openai.com/v1/embeddings") .post("https://api.openai.com/v1/embeddings")
.bearer_auth(api_key) .bearer_auth(api_key)
@ -178,7 +186,10 @@ async fn main() {
Mode::Mock Mode::Mock
} else { } else {
println!("forage-embedder: OpenAI mode (text-embedding-3-small)"); println!("forage-embedder: OpenAI mode (text-embedding-3-small)");
Mode::OpenAi { api_key: key } Mode::OpenAi {
api_key: key,
client: reqwest::Client::new(),
}
} }
}; };

View File

@ -1175,10 +1175,10 @@ fn call_embedder(
/// 3. Strip a trailing slash from the path unless the path is the root `/`. /// 3. Strip a trailing slash from the path unless the path is the root `/`.
fn canonicalize_url(url: &str) -> String { fn canonicalize_url(url: &str) -> String {
// 1. Strip amp. subdomain // 1. Strip amp. subdomain
let s = if url.starts_with("https://amp.") { let s = if let Some(rest) = url.strip_prefix("https://amp.") {
format!("https://{}", &url["https://amp.".len()..]) format!("https://{rest}")
} else if url.starts_with("http://amp.") { } else if let Some(rest) = url.strip_prefix("http://amp.") {
format!("http://{}", &url["http://amp.".len()..]) format!("http://{rest}")
} else { } else {
url.to_owned() url.to_owned()
}; };
@ -1199,7 +1199,7 @@ fn canonicalize_url(url: &str) -> String {
// Find position of the first slash after "://" // Find position of the first slash after "://"
let after_scheme = base.find("://").map_or(0, |i| i + 3); let after_scheme = base.find("://").map_or(0, |i| i + 3);
let first_path_slash = base[after_scheme..].find('/'); let first_path_slash = base[after_scheme..].find('/');
let has_real_path = first_path_slash.map_or(false, |j| base.len() > after_scheme + j + 1); let has_real_path = first_path_slash.is_some_and(|j| base.len() > after_scheme + j + 1);
if has_real_path && base.ends_with('/') { if has_real_path && base.ends_with('/') {
base[..base.len() - 1].to_owned() base[..base.len() - 1].to_owned()
} else { } else {
@ -1229,6 +1229,73 @@ fn canonicalize_url(url: &str) -> String {
} }
} }
#[cfg(test)]
mod canon_tests {
use super::canonicalize_url;
#[test]
fn strips_amp_subdomain_https() {
assert_eq!(
canonicalize_url("https://amp.example.com/article/123"),
"https://example.com/article/123"
);
}
#[test]
fn strips_amp_subdomain_http() {
assert_eq!(
canonicalize_url("http://amp.cnn.com/story"),
"http://cnn.com/story"
);
}
#[test]
fn removes_amp_query_param_standalone() {
assert_eq!(
canonicalize_url("https://example.com/article?amp=1"),
"https://example.com/article"
);
}
#[test]
fn removes_amp_tf_query_param_among_others() {
assert_eq!(
canonicalize_url("https://example.com/a?q=rust&amp_tf=1&page=2"),
"https://example.com/a?q=rust&page=2"
);
}
#[test]
fn strips_trailing_slash_from_real_path() {
assert_eq!(
canonicalize_url("https://example.com/article/"),
"https://example.com/article"
);
}
#[test]
fn preserves_root_slash() {
assert_eq!(
canonicalize_url("https://example.com/"),
"https://example.com/"
);
}
#[test]
fn preserves_meaningful_query_params() {
let url = "https://example.com/search?q=rust&lang=en";
assert_eq!(canonicalize_url(url), url);
}
#[test]
fn amp_subdomain_plus_amp_param_both_stripped() {
assert_eq!(
canonicalize_url("https://amp.example.com/post/?amp=1"),
"https://example.com/post"
);
}
}
/// Round-robin interleave items by category to ensure the cold-start exploit /// Round-robin interleave items by category to ensure the cold-start exploit
/// pool spans ≥3 categories. Preserves score ordering within each category. /// pool spans ≥3 categories. Preserves score ordering within each category.
/// ///

View File

@ -3,7 +3,7 @@
"name": "Forage", "name": "Forage",
"version": "0.1.0", "version": "0.1.0",
"description": "Automatically capture browsing signals for your Forage personalized feed", "description": "Automatically capture browsing signals for your Forage personalized feed",
"permissions": ["storage"], "permissions": ["storage", "tabs"],
"host_permissions": ["http://*/*", "https://*/*"], "host_permissions": ["http://*/*", "https://*/*"],
"content_scripts": [ "content_scripts": [
{ {

View File

@ -15,10 +15,13 @@
* Configuration: * Configuration:
* Change USER_ID to match the Forage user you are browsing as (1, 2, or 3 * Change USER_ID to match the Forage user you are browsing as (1, 2, or 3
* for the seed users; any positive integer for a new user). * for the seed users; any positive integer for a new user).
* Set TOKEN to the value passed with --token when starting the server,
* or leave empty ('') if the server was started without --token.
*/ */
(function forageCapture() { (function forageCapture() {
const SERVER = 'http://localhost:4242'; const SERVER = 'http://localhost:4242';
const USER_ID = 1; const USER_ID = 1;
const TOKEN = '';
const DWELL_MS = 30_000; const DWELL_MS = 30_000;
const url = location.href; const url = location.href;
@ -46,9 +49,11 @@
let itemId = null; let itemId = null;
const authHeaders = TOKEN ? { Authorization: `Bearer ${TOKEN}` } : {};
fetch(`${SERVER}/capture`, { fetch(`${SERVER}/capture`, {
method: 'POST', method: 'POST',
headers: { 'Content-Type': 'application/json' }, headers: { 'Content-Type': 'application/json', ...authHeaders },
body: JSON.stringify({ body: JSON.stringify({
url, url,
canonical_url: canonicalUrl, canonical_url: canonicalUrl,
@ -73,7 +78,7 @@
if (itemId == null) return; if (itemId == null) return;
fetch(`${SERVER}/signal`, { fetch(`${SERVER}/signal`, {
method: 'POST', method: 'POST',
headers: { 'Content-Type': 'application/json' }, headers: { 'Content-Type': 'application/json', ...authHeaders },
body: JSON.stringify({ body: JSON.stringify({
user_id: USER_ID, user_id: USER_ID,
item_id: itemId, item_id: itemId,