chore: reuse reqwest::Client across requests in forage embedder; minor forage updates
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
213b8efcca
commit
c1c5a10fbc
@ -53,7 +53,12 @@ struct Args {
|
||||
#[derive(Clone)]
|
||||
enum Mode {
|
||||
Mock,
|
||||
OpenAi { api_key: String },
|
||||
/// `client` is created once at startup and reused across requests.
|
||||
/// `reqwest::Client` is cheaply cloneable (`Arc`-backed connection pool).
|
||||
OpenAi {
|
||||
api_key: String,
|
||||
client: reqwest::Client,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
@ -70,7 +75,7 @@ struct EmbedResp {
|
||||
async fn post_embed(State(mode): State<Arc<Mode>>, Json(req): Json<EmbedReq>) -> impl IntoResponse {
|
||||
let vector = match mode.as_ref() {
|
||||
Mode::Mock => mock_embed(&req.text),
|
||||
Mode::OpenAi { api_key } => match openai_embed(api_key, &req.text).await {
|
||||
Mode::OpenAi { api_key, client } => match openai_embed(client, api_key, &req.text).await {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
return (
|
||||
@ -119,8 +124,11 @@ fn mock_embed(text: &str) -> Vec<f32> {
|
||||
}
|
||||
|
||||
/// OpenAI text-embedding-3-small call.
|
||||
async fn openai_embed(api_key: &str, text: &str) -> Result<Vec<f32>, String> {
|
||||
let client = reqwest::Client::new();
|
||||
async fn openai_embed(
|
||||
client: &reqwest::Client,
|
||||
api_key: &str,
|
||||
text: &str,
|
||||
) -> Result<Vec<f32>, String> {
|
||||
let resp = client
|
||||
.post("https://api.openai.com/v1/embeddings")
|
||||
.bearer_auth(api_key)
|
||||
@ -178,7 +186,10 @@ async fn main() {
|
||||
Mode::Mock
|
||||
} else {
|
||||
println!("forage-embedder: OpenAI mode (text-embedding-3-small)");
|
||||
Mode::OpenAi { api_key: key }
|
||||
Mode::OpenAi {
|
||||
api_key: key,
|
||||
client: reqwest::Client::new(),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -1175,10 +1175,10 @@ fn call_embedder(
|
||||
/// 3. Strip a trailing slash from the path unless the path is the root `/`.
|
||||
fn canonicalize_url(url: &str) -> String {
|
||||
// 1. Strip amp. subdomain
|
||||
let s = if url.starts_with("https://amp.") {
|
||||
format!("https://{}", &url["https://amp.".len()..])
|
||||
} else if url.starts_with("http://amp.") {
|
||||
format!("http://{}", &url["http://amp.".len()..])
|
||||
let s = if let Some(rest) = url.strip_prefix("https://amp.") {
|
||||
format!("https://{rest}")
|
||||
} else if let Some(rest) = url.strip_prefix("http://amp.") {
|
||||
format!("http://{rest}")
|
||||
} else {
|
||||
url.to_owned()
|
||||
};
|
||||
@ -1199,7 +1199,7 @@ fn canonicalize_url(url: &str) -> String {
|
||||
// Find position of the first slash after "://"
|
||||
let after_scheme = base.find("://").map_or(0, |i| i + 3);
|
||||
let first_path_slash = base[after_scheme..].find('/');
|
||||
let has_real_path = first_path_slash.map_or(false, |j| base.len() > after_scheme + j + 1);
|
||||
let has_real_path = first_path_slash.is_some_and(|j| base.len() > after_scheme + j + 1);
|
||||
if has_real_path && base.ends_with('/') {
|
||||
base[..base.len() - 1].to_owned()
|
||||
} else {
|
||||
@ -1229,6 +1229,73 @@ fn canonicalize_url(url: &str) -> String {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod canon_tests {
|
||||
use super::canonicalize_url;
|
||||
|
||||
#[test]
|
||||
fn strips_amp_subdomain_https() {
|
||||
assert_eq!(
|
||||
canonicalize_url("https://amp.example.com/article/123"),
|
||||
"https://example.com/article/123"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strips_amp_subdomain_http() {
|
||||
assert_eq!(
|
||||
canonicalize_url("http://amp.cnn.com/story"),
|
||||
"http://cnn.com/story"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn removes_amp_query_param_standalone() {
|
||||
assert_eq!(
|
||||
canonicalize_url("https://example.com/article?amp=1"),
|
||||
"https://example.com/article"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn removes_amp_tf_query_param_among_others() {
|
||||
assert_eq!(
|
||||
canonicalize_url("https://example.com/a?q=rust&_tf=1&page=2"),
|
||||
"https://example.com/a?q=rust&page=2"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strips_trailing_slash_from_real_path() {
|
||||
assert_eq!(
|
||||
canonicalize_url("https://example.com/article/"),
|
||||
"https://example.com/article"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn preserves_root_slash() {
|
||||
assert_eq!(
|
||||
canonicalize_url("https://example.com/"),
|
||||
"https://example.com/"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn preserves_meaningful_query_params() {
|
||||
let url = "https://example.com/search?q=rust&lang=en";
|
||||
assert_eq!(canonicalize_url(url), url);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn amp_subdomain_plus_amp_param_both_stripped() {
|
||||
assert_eq!(
|
||||
canonicalize_url("https://amp.example.com/post/?amp=1"),
|
||||
"https://example.com/post"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Round-robin interleave items by category to ensure the cold-start exploit
|
||||
/// pool spans ≥3 categories. Preserves score ordering within each category.
|
||||
///
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
"name": "Forage",
|
||||
"version": "0.1.0",
|
||||
"description": "Automatically capture browsing signals for your Forage personalized feed",
|
||||
"permissions": ["storage"],
|
||||
"permissions": ["storage", "tabs"],
|
||||
"host_permissions": ["http://*/*", "https://*/*"],
|
||||
"content_scripts": [
|
||||
{
|
||||
|
||||
@ -15,10 +15,13 @@
|
||||
* Configuration:
|
||||
* Change USER_ID to match the Forage user you are browsing as (1, 2, or 3
|
||||
* for the seed users; any positive integer for a new user).
|
||||
* Set TOKEN to the value passed with --token when starting the server,
|
||||
* or leave empty ('') if the server was started without --token.
|
||||
*/
|
||||
(function forageCapture() {
|
||||
const SERVER = 'http://localhost:4242';
|
||||
const USER_ID = 1;
|
||||
const TOKEN = '';
|
||||
const DWELL_MS = 30_000;
|
||||
|
||||
const url = location.href;
|
||||
@ -46,9 +49,11 @@
|
||||
|
||||
let itemId = null;
|
||||
|
||||
const authHeaders = TOKEN ? { Authorization: `Bearer ${TOKEN}` } : {};
|
||||
|
||||
fetch(`${SERVER}/capture`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
headers: { 'Content-Type': 'application/json', ...authHeaders },
|
||||
body: JSON.stringify({
|
||||
url,
|
||||
canonical_url: canonicalUrl,
|
||||
@ -73,7 +78,7 @@
|
||||
if (itemId == null) return;
|
||||
fetch(`${SERVER}/signal`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
headers: { 'Content-Type': 'application/json', ...authHeaders },
|
||||
body: JSON.stringify({
|
||||
user_id: USER_ID,
|
||||
item_id: itemId,
|
||||
|
||||
Loading…
Reference in New Issue
Block a user