chore: reuse reqwest::Client across requests in forage embedder; minor forage updates
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
213b8efcca
commit
c1c5a10fbc
@ -53,7 +53,12 @@ struct Args {
|
|||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
enum Mode {
|
enum Mode {
|
||||||
Mock,
|
Mock,
|
||||||
OpenAi { api_key: String },
|
/// `client` is created once at startup and reused across requests.
|
||||||
|
/// `reqwest::Client` is cheaply cloneable (`Arc`-backed connection pool).
|
||||||
|
OpenAi {
|
||||||
|
api_key: String,
|
||||||
|
client: reqwest::Client,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Deserialize)]
|
#[derive(Deserialize)]
|
||||||
@ -70,7 +75,7 @@ struct EmbedResp {
|
|||||||
async fn post_embed(State(mode): State<Arc<Mode>>, Json(req): Json<EmbedReq>) -> impl IntoResponse {
|
async fn post_embed(State(mode): State<Arc<Mode>>, Json(req): Json<EmbedReq>) -> impl IntoResponse {
|
||||||
let vector = match mode.as_ref() {
|
let vector = match mode.as_ref() {
|
||||||
Mode::Mock => mock_embed(&req.text),
|
Mode::Mock => mock_embed(&req.text),
|
||||||
Mode::OpenAi { api_key } => match openai_embed(api_key, &req.text).await {
|
Mode::OpenAi { api_key, client } => match openai_embed(client, api_key, &req.text).await {
|
||||||
Ok(v) => v,
|
Ok(v) => v,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
return (
|
return (
|
||||||
@ -119,8 +124,11 @@ fn mock_embed(text: &str) -> Vec<f32> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// OpenAI text-embedding-3-small call.
|
/// OpenAI text-embedding-3-small call.
|
||||||
async fn openai_embed(api_key: &str, text: &str) -> Result<Vec<f32>, String> {
|
async fn openai_embed(
|
||||||
let client = reqwest::Client::new();
|
client: &reqwest::Client,
|
||||||
|
api_key: &str,
|
||||||
|
text: &str,
|
||||||
|
) -> Result<Vec<f32>, String> {
|
||||||
let resp = client
|
let resp = client
|
||||||
.post("https://api.openai.com/v1/embeddings")
|
.post("https://api.openai.com/v1/embeddings")
|
||||||
.bearer_auth(api_key)
|
.bearer_auth(api_key)
|
||||||
@ -178,7 +186,10 @@ async fn main() {
|
|||||||
Mode::Mock
|
Mode::Mock
|
||||||
} else {
|
} else {
|
||||||
println!("forage-embedder: OpenAI mode (text-embedding-3-small)");
|
println!("forage-embedder: OpenAI mode (text-embedding-3-small)");
|
||||||
Mode::OpenAi { api_key: key }
|
Mode::OpenAi {
|
||||||
|
api_key: key,
|
||||||
|
client: reqwest::Client::new(),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -1175,10 +1175,10 @@ fn call_embedder(
|
|||||||
/// 3. Strip a trailing slash from the path unless the path is the root `/`.
|
/// 3. Strip a trailing slash from the path unless the path is the root `/`.
|
||||||
fn canonicalize_url(url: &str) -> String {
|
fn canonicalize_url(url: &str) -> String {
|
||||||
// 1. Strip amp. subdomain
|
// 1. Strip amp. subdomain
|
||||||
let s = if url.starts_with("https://amp.") {
|
let s = if let Some(rest) = url.strip_prefix("https://amp.") {
|
||||||
format!("https://{}", &url["https://amp.".len()..])
|
format!("https://{rest}")
|
||||||
} else if url.starts_with("http://amp.") {
|
} else if let Some(rest) = url.strip_prefix("http://amp.") {
|
||||||
format!("http://{}", &url["http://amp.".len()..])
|
format!("http://{rest}")
|
||||||
} else {
|
} else {
|
||||||
url.to_owned()
|
url.to_owned()
|
||||||
};
|
};
|
||||||
@ -1199,7 +1199,7 @@ fn canonicalize_url(url: &str) -> String {
|
|||||||
// Find position of the first slash after "://"
|
// Find position of the first slash after "://"
|
||||||
let after_scheme = base.find("://").map_or(0, |i| i + 3);
|
let after_scheme = base.find("://").map_or(0, |i| i + 3);
|
||||||
let first_path_slash = base[after_scheme..].find('/');
|
let first_path_slash = base[after_scheme..].find('/');
|
||||||
let has_real_path = first_path_slash.map_or(false, |j| base.len() > after_scheme + j + 1);
|
let has_real_path = first_path_slash.is_some_and(|j| base.len() > after_scheme + j + 1);
|
||||||
if has_real_path && base.ends_with('/') {
|
if has_real_path && base.ends_with('/') {
|
||||||
base[..base.len() - 1].to_owned()
|
base[..base.len() - 1].to_owned()
|
||||||
} else {
|
} else {
|
||||||
@ -1229,6 +1229,73 @@ fn canonicalize_url(url: &str) -> String {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod canon_tests {
|
||||||
|
use super::canonicalize_url;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn strips_amp_subdomain_https() {
|
||||||
|
assert_eq!(
|
||||||
|
canonicalize_url("https://amp.example.com/article/123"),
|
||||||
|
"https://example.com/article/123"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn strips_amp_subdomain_http() {
|
||||||
|
assert_eq!(
|
||||||
|
canonicalize_url("http://amp.cnn.com/story"),
|
||||||
|
"http://cnn.com/story"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn removes_amp_query_param_standalone() {
|
||||||
|
assert_eq!(
|
||||||
|
canonicalize_url("https://example.com/article?amp=1"),
|
||||||
|
"https://example.com/article"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn removes_amp_tf_query_param_among_others() {
|
||||||
|
assert_eq!(
|
||||||
|
canonicalize_url("https://example.com/a?q=rust&_tf=1&page=2"),
|
||||||
|
"https://example.com/a?q=rust&page=2"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn strips_trailing_slash_from_real_path() {
|
||||||
|
assert_eq!(
|
||||||
|
canonicalize_url("https://example.com/article/"),
|
||||||
|
"https://example.com/article"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn preserves_root_slash() {
|
||||||
|
assert_eq!(
|
||||||
|
canonicalize_url("https://example.com/"),
|
||||||
|
"https://example.com/"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn preserves_meaningful_query_params() {
|
||||||
|
let url = "https://example.com/search?q=rust&lang=en";
|
||||||
|
assert_eq!(canonicalize_url(url), url);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn amp_subdomain_plus_amp_param_both_stripped() {
|
||||||
|
assert_eq!(
|
||||||
|
canonicalize_url("https://amp.example.com/post/?amp=1"),
|
||||||
|
"https://example.com/post"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Round-robin interleave items by category to ensure the cold-start exploit
|
/// Round-robin interleave items by category to ensure the cold-start exploit
|
||||||
/// pool spans ≥3 categories. Preserves score ordering within each category.
|
/// pool spans ≥3 categories. Preserves score ordering within each category.
|
||||||
///
|
///
|
||||||
|
|||||||
@ -3,7 +3,7 @@
|
|||||||
"name": "Forage",
|
"name": "Forage",
|
||||||
"version": "0.1.0",
|
"version": "0.1.0",
|
||||||
"description": "Automatically capture browsing signals for your Forage personalized feed",
|
"description": "Automatically capture browsing signals for your Forage personalized feed",
|
||||||
"permissions": ["storage"],
|
"permissions": ["storage", "tabs"],
|
||||||
"host_permissions": ["http://*/*", "https://*/*"],
|
"host_permissions": ["http://*/*", "https://*/*"],
|
||||||
"content_scripts": [
|
"content_scripts": [
|
||||||
{
|
{
|
||||||
|
|||||||
@ -15,10 +15,13 @@
|
|||||||
* Configuration:
|
* Configuration:
|
||||||
* Change USER_ID to match the Forage user you are browsing as (1, 2, or 3
|
* Change USER_ID to match the Forage user you are browsing as (1, 2, or 3
|
||||||
* for the seed users; any positive integer for a new user).
|
* for the seed users; any positive integer for a new user).
|
||||||
|
* Set TOKEN to the value passed with --token when starting the server,
|
||||||
|
* or leave empty ('') if the server was started without --token.
|
||||||
*/
|
*/
|
||||||
(function forageCapture() {
|
(function forageCapture() {
|
||||||
const SERVER = 'http://localhost:4242';
|
const SERVER = 'http://localhost:4242';
|
||||||
const USER_ID = 1;
|
const USER_ID = 1;
|
||||||
|
const TOKEN = '';
|
||||||
const DWELL_MS = 30_000;
|
const DWELL_MS = 30_000;
|
||||||
|
|
||||||
const url = location.href;
|
const url = location.href;
|
||||||
@ -46,9 +49,11 @@
|
|||||||
|
|
||||||
let itemId = null;
|
let itemId = null;
|
||||||
|
|
||||||
|
const authHeaders = TOKEN ? { Authorization: `Bearer ${TOKEN}` } : {};
|
||||||
|
|
||||||
fetch(`${SERVER}/capture`, {
|
fetch(`${SERVER}/capture`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: { 'Content-Type': 'application/json', ...authHeaders },
|
||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
url,
|
url,
|
||||||
canonical_url: canonicalUrl,
|
canonical_url: canonicalUrl,
|
||||||
@ -73,7 +78,7 @@
|
|||||||
if (itemId == null) return;
|
if (itemId == null) return;
|
||||||
fetch(`${SERVER}/signal`, {
|
fetch(`${SERVER}/signal`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: { 'Content-Type': 'application/json', ...authHeaders },
|
||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
user_id: USER_ID,
|
user_id: USER_ID,
|
||||||
item_id: itemId,
|
item_id: itemId,
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user