Skip to content

Commit 8ade09d

Browse files
Added the ability to configure the hybrid keyword in the search
1 parent 8a40abb commit 8ade09d

File tree

1 file changed

+132
-3
lines changed

1 file changed

+132
-3
lines changed

src/search.rs

+132-3
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,22 @@ pub enum Selectors<T> {
138138
All,
139139
}
140140

141+
#[cfg(feature = "experimental-vector-search")]
142+
#[derive(Debug, Serialize, Clone)]
143+
#[serde(rename_all = "camelCase")]
144+
pub struct HybridSearch<'a> {
145+
/// Indicates one of the embedders configured for the queried index
146+
///
147+
/// **Default: `"default"`**
148+
embedder: &'a str,
149+
/// number between `0` and `1`:
150+
/// - `0.0` indicates full keyword search
151+
/// - `1.0` indicates full semantic search
152+
///
153+
/// **Default: `0.5`**
154+
semantic_ratio: f32,
155+
}
156+
141157
type AttributeToCrop<'a> = (&'a str, Option<usize>);
142158

143159
/// A struct representing a query.
@@ -328,6 +344,12 @@ pub struct SearchQuery<'a, Http: HttpClient> {
328344

329345
#[serde(skip_serializing_if = "Option::is_none")]
330346
pub(crate) index_uid: Option<&'a str>,
347+
348+
/// EXPERIMENTAL
349+
/// Defines whether to utilise previously defined embedders for semantic searching
350+
#[cfg(feature = "experimental-vector-search")]
351+
#[serde(skip_serializing_if = "Option::is_none")]
352+
pub hybrid: Option<HybridSearch<'a>>,
331353
}
332354

333355
#[allow(missing_docs)]
@@ -356,6 +378,8 @@ impl<'a, Http: HttpClient> SearchQuery<'a, Http> {
356378
show_ranking_score: None,
357379
matching_strategy: None,
358380
index_uid: None,
381+
#[cfg(feature = "experimental-vector-search")]
382+
hybrid: None,
359383
}
360384
}
361385
pub fn with_query<'b>(&'b mut self, query: &'a str) -> &'b mut SearchQuery<'a, Http> {
@@ -539,6 +563,20 @@ impl<'a, Http: HttpClient> SearchQuery<'a, Http> {
539563
self.index_uid = Some(&self.index.uid);
540564
self
541565
}
566+
#[cfg(feature = "experimental-vector-search")]
567+
pub fn with_hybrid<'b>(
568+
&'b mut self,
569+
embedder: &'a str,
570+
semantic_ratio: f32,
571+
) -> &'b mut SearchQuery<'a, Http> {
572+
self.hybrid = Some(HybridSearch {
573+
embedder,
574+
semantic_ratio,
575+
});
576+
self
577+
}
578+
579+
#[must_use]
542580
pub fn build(&mut self) -> SearchQuery<'a, Http> {
543581
self.clone()
544582
}
@@ -612,6 +650,7 @@ mod tests {
612650
use meilisearch_test_macro::meilisearch_test;
613651
use serde::{Deserialize, Serialize};
614652
use serde_json::{json, Map, Value};
653+
use std::time::Duration;
615654

616655
#[derive(Debug, Serialize, Deserialize, PartialEq)]
617656
struct Nested {
@@ -654,9 +693,15 @@ mod tests {
654693
.await?;
655694
let t2 = index.set_sortable_attributes(["title"]).await?;
656695

657-
t2.wait_for_completion(client, None, None).await?;
658-
t1.wait_for_completion(client, None, None).await?;
659-
t0.wait_for_completion(client, None, None).await?;
696+
// the vector search has longer indexing times leading to the timeout being triggered
697+
let timeout = if cfg!(feature = "experimental-vector-search") {
698+
Some(Duration::from_secs(120))
699+
} else {
700+
None
701+
};
702+
t2.wait_for_completion(client, None, timeout).await?;
703+
t1.wait_for_completion(client, None, timeout).await?;
704+
t0.wait_for_completion(client, None, timeout).await?;
660705

661706
Ok(())
662707
}
@@ -1174,4 +1219,88 @@ mod tests {
11741219

11751220
Ok(())
11761221
}
1222+
1223+
#[cfg(feature = "experimental-vector-search")]
1224+
#[meilisearch_test]
1225+
async fn test_hybrid(client: Client, index: Index) -> Result<(), Error> {
1226+
use crate::settings::{Embedder, HuggingFaceEmbedderSettings};
1227+
log::warn!("You are executing the vector search test. This WILL take a while and might lead to timeouts in other tests. You can disable this testcase by not enabling the `experimental-vector-search`-feature and running this ");
1228+
// enable vector searching and configure an embedder
1229+
let features = crate::features::ExperimentalFeatures::new(&client)
1230+
.set_vector_store(true)
1231+
.update()
1232+
.await
1233+
.expect("could not enable the vector store");
1234+
assert_eq!(features.vector_store, true);
1235+
let embedder_setting = Embedder::HuggingFace(HuggingFaceEmbedderSettings {
1236+
model: Some("BAAI/bge-base-en-v1.5".into()),
1237+
revision: None,
1238+
document_template: Some("{{ doc.value }}".into()),
1239+
});
1240+
let t3 = index
1241+
.set_settings(&crate::settings::Settings {
1242+
embedders: Some(HashMap::from([("default".to_string(), embedder_setting)])),
1243+
..crate::settings::Settings::default()
1244+
})
1245+
.await?;
1246+
t3.wait_for_completion(&client, None, None).await?;
1247+
1248+
setup_test_index(&client, &index).await?;
1249+
1250+
// "zweite" = "second" in german
1251+
// => an embedding should be able to detect that this is equivalent, but not the regular search
1252+
let results: SearchResults<Document> = index
1253+
.search()
1254+
.with_query("Facebook")
1255+
.with_hybrid("default", 1.0) // entirely rely on semantic searching
1256+
.execute()
1257+
.await?;
1258+
assert_eq!(results.hits.len(), 1);
1259+
assert_eq!(
1260+
&Document {
1261+
id: 1,
1262+
value: S("dolor sit amet, consectetur adipiscing elit"),
1263+
kind: S("text"),
1264+
number: 10,
1265+
nested: Nested { child: S("second") },
1266+
},
1267+
&results.hits[0].result
1268+
);
1269+
let results: SearchResults<Document> = index
1270+
.search()
1271+
.with_query("zweite")
1272+
.with_hybrid("default", 0.0) // no semantic searching => no matches
1273+
.execute()
1274+
.await?;
1275+
assert_eq!(results.hits.len(), 0);
1276+
1277+
// word that has a typo => would have been found via traditional means
1278+
// if entirely relying on semantic searching, no result is found
1279+
let results: SearchResults<Document> = index
1280+
.search()
1281+
.with_query("lohrem")
1282+
.with_hybrid("default", 1.0)
1283+
.execute()
1284+
.await?;
1285+
assert_eq!(results.hits.len(), 0);
1286+
let results: SearchResults<Document> = index
1287+
.search()
1288+
.with_query("lohrem")
1289+
.with_hybrid("default", 0.0)
1290+
.execute()
1291+
.await?;
1292+
assert_eq!(results.hits.len(), 1);
1293+
assert_eq!(
1294+
&Document {
1295+
id: 0,
1296+
value: S("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."),
1297+
kind: S("text"),
1298+
number: 0,
1299+
nested: Nested { child: S("first") }
1300+
},
1301+
&results.hits[0].result
1302+
);
1303+
1304+
Ok(())
1305+
}
11771306
}

0 commit comments

Comments
 (0)