refactor: work on search

This commit is contained in:
Timo Kösters 2022-07-10 16:28:43 +02:00
parent a10e7e7263
commit ada1251a52
No known key found for this signature in database
GPG key ID: 356E705610F626D5
16 changed files with 87 additions and 57 deletions

View file

@ -1 +0,0 @@
asdf

View file

@ -0,0 +1,66 @@
impl service::room::search::Data for KeyValueDatabase {
fn index_pdu<'a>(&self, room_id: &RoomId, pdu_id: u64, message_body: String) -> Result<()> {
let mut batch = body
.split_terminator(|c: char| !c.is_alphanumeric())
.filter(|s| !s.is_empty())
.filter(|word| word.len() <= 50)
.map(str::to_lowercase)
.map(|word| {
let mut key = shortroomid.to_be_bytes().to_vec();
key.extend_from_slice(word.as_bytes());
key.push(0xff);
key.extend_from_slice(&pdu_id);
(key, Vec::new())
});
self.tokenids.insert_batch(&mut batch)?;
}
fn search_pdus<'a>(
&'a self,
room_id: &RoomId,
search_string: &str,
) -> Result<Option<(impl Iterator<Item = Vec<u8>> + 'a, Vec<String>)>> {
let prefix = self
.get_shortroomid(room_id)?
.expect("room exists")
.to_be_bytes()
.to_vec();
let prefix_clone = prefix.clone();
let words: Vec<_> = search_string
.split_terminator(|c: char| !c.is_alphanumeric())
.filter(|s| !s.is_empty())
.map(str::to_lowercase)
.collect();
let iterators = words.clone().into_iter().map(move |word| {
let mut prefix2 = prefix.clone();
prefix2.extend_from_slice(word.as_bytes());
prefix2.push(0xff);
let mut last_possible_id = prefix2.clone();
last_possible_id.extend_from_slice(&u64::MAX.to_be_bytes());
self.tokenids
.iter_from(&last_possible_id, true) // Newest pdus first
.take_while(move |(k, _)| k.starts_with(&prefix2))
.map(|(key, _)| key[key.len() - size_of::<u64>()..].to_vec())
});
Ok(utils::common_elements(iterators, |a, b| {
// We compare b with a because we reversed the iterator earlier
b.cmp(a)
})
.map(|iter| {
(
iter.map(move |id| {
let mut pduid = prefix_clone.clone();
pduid.extend_from_slice(&id);
pduid
}),
words,
)
}))
}

View file

@ -0,0 +1,9 @@
pub trait Data {
pub fn index_pdu<'a>(&self, room_id: &RoomId, pdu_id: u64, message_body: String) -> Result<()>;
pub fn search_pdus<'a>(
&'a self,
room_id: &RoomId,
search_string: &str,
) -> Result<Option<(impl Iterator<Item = Vec<u8>> + 'a, Vec<String>)>>;
}

View file

@ -1,50 +1,19 @@
mod data;
pub use data::Data;
use crate::service::*;
pub struct Service<D: Data> {
db: D,
}
impl Service<_> {
#[tracing::instrument(skip(self))]
pub fn search_pdus<'a>(
&'a self,
room_id: &RoomId,
search_string: &str,
) -> Result<Option<(impl Iterator<Item = Vec<u8>> + 'a, Vec<String>)>> {
let prefix = self
.get_shortroomid(room_id)?
.expect("room exists")
.to_be_bytes()
.to_vec();
let prefix_clone = prefix.clone();
let words: Vec<_> = search_string
.split_terminator(|c: char| !c.is_alphanumeric())
.filter(|s| !s.is_empty())
.map(str::to_lowercase)
.collect();
let iterators = words.clone().into_iter().map(move |word| {
let mut prefix2 = prefix.clone();
prefix2.extend_from_slice(word.as_bytes());
prefix2.push(0xff);
let mut last_possible_id = prefix2.clone();
last_possible_id.extend_from_slice(&u64::MAX.to_be_bytes());
self.tokenids
.iter_from(&last_possible_id, true) // Newest pdus first
.take_while(move |(k, _)| k.starts_with(&prefix2))
.map(|(key, _)| key[key.len() - size_of::<u64>()..].to_vec())
});
Ok(utils::common_elements(iterators, |a, b| {
// We compare b with a because we reversed the iterator earlier
b.cmp(a)
})
.map(|iter| {
(
iter.map(move |id| {
let mut pduid = prefix_clone.clone();
pduid.extend_from_slice(&id);
pduid
}),
words,
)
}))
self.db.search_pdus(room_id, search_string)
}
}

View file

@ -439,20 +439,7 @@
.map_err(|_| Error::bad_database("Invalid content in pdu."))?;
if let Some(body) = content.body {
let mut batch = body
.split_terminator(|c: char| !c.is_alphanumeric())
.filter(|s| !s.is_empty())
.filter(|word| word.len() <= 50)
.map(str::to_lowercase)
.map(|word| {
let mut key = shortroomid.to_be_bytes().to_vec();
key.extend_from_slice(word.as_bytes());
key.push(0xff);
key.extend_from_slice(&pdu_id);
(key, Vec::new())
});
self.tokenids.insert_batch(&mut batch)?;
DB.rooms.search.index_pdu(room_id, pdu_id, body)?;
let admin_room = self.id_from_alias(
<&RoomAliasId>::try_from(