This commit is contained in:
tezlm 2023-10-14 07:59:46 -07:00
parent 3bfdae795d
commit a695b18882
Signed by: tezlm
GPG key ID: 649733FCD94AFBBA
9 changed files with 520 additions and 88 deletions

435
Cargo.lock generated
View file

@ -218,9 +218,18 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bitflags"
version = "2.3.2"
version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6dbe3c979c178231552ecba20214a8272df4e09f232a87aef4320cf06539aded"
checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635"
[[package]]
name = "bitpacking"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8c7d2ac73c167c06af4a5f37e6e59d84148d57ccbe4480b76f0273eefea82d7"
dependencies = [
"crunchy",
]
[[package]]
name = "blake2b_simd"
@ -286,6 +295,12 @@ dependencies = [
"jobserver",
]
[[package]]
name = "census"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fafee10a5dd1cffcb5cc560e0d0df8803d7355a2b12272e3557dee57314cb6e"
[[package]]
name = "cexpr"
version = "0.6.0"
@ -399,6 +414,7 @@ dependencies = [
"serde_json",
"serde_yaml",
"sha-1",
"tantivy",
"thiserror",
"thread_local",
"threadpool",
@ -554,6 +570,12 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "crunchy"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
[[package]]
name = "crypto-common"
version = "0.1.6"
@ -652,6 +674,12 @@ dependencies = [
"winapi",
]
[[package]]
name = "downcast-rs"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ea835d29036a4087793836fa931b08837ad5e957da9e23886b29586fb9b6650"
[[package]]
name = "ed25519"
version = "2.2.2"
@ -709,6 +737,16 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88bffebc5d80432c9b140ee17875ff173a8ab62faad5b257da912bd2f6c1c0a1"
[[package]]
name = "errno"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3e13f66a2f95e32a39eaa81f6b95d42878ca0e1db0c7543723dfe12557e860"
dependencies = [
"libc",
"windows-sys 0.48.0",
]
[[package]]
name = "fallible-iterator"
version = "0.2.0"
@ -721,6 +759,18 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
[[package]]
name = "fastdivide"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25c7df09945d65ea8d70b3321547ed414bbc540aad5bac6883d021b970f35b04"
[[package]]
name = "fastrand"
version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5"
[[package]]
name = "fdeflate"
version = "0.3.0"
@ -785,6 +835,16 @@ dependencies = [
"winapi",
]
[[package]]
name = "fs4"
version = "0.6.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2eeb4ed9e12f43b7fa0baae3f9cdda28352770132ef2e09a23760c29cae8bd47"
dependencies = [
"rustix",
"windows-sys 0.48.0",
]
[[package]]
name = "futures"
version = "0.3.28"
@ -874,6 +934,19 @@ dependencies = [
"slab",
]
[[package]]
name = "generator"
version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5cc16584ff22b460a382b7feec54b23d2908d858152e5739a120b949293bd74e"
dependencies = [
"cc",
"libc",
"log",
"rustversion",
"windows",
]
[[package]]
name = "generic-array"
version = "0.14.7"
@ -1051,6 +1124,12 @@ dependencies = [
"winapi",
]
[[package]]
name = "htmlescape"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163"
[[package]]
name = "http"
version = "0.2.9"
@ -1192,6 +1271,18 @@ version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb"
[[package]]
name = "instant"
version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
dependencies = [
"cfg-if",
"js-sys",
"wasm-bindgen",
"web-sys",
]
[[package]]
name = "integer-encoding"
version = "3.0.4"
@ -1319,6 +1410,12 @@ version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
[[package]]
name = "levenshtein_automata"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25"
[[package]]
name = "libc"
version = "0.2.146"
@ -1379,6 +1476,12 @@ version = "0.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f"
[[package]]
name = "linux-raw-sys"
version = "0.4.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da2479e8c062e40bf0066ffa0bc823de0a9368974af99c9f6df941d2c231e03f"
[[package]]
name = "lmdb-rkv-sys"
version = "0.11.2"
@ -1406,6 +1509,29 @@ version = "0.4.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4"
[[package]]
name = "loom"
version = "0.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ff50ecb28bb86013e935fb6683ab1f6d3a20016f123c76fd4c27470076ac30f5"
dependencies = [
"cfg-if",
"generator",
"pin-utils",
"scoped-tls",
"tracing",
"tracing-subscriber",
]
[[package]]
name = "lru"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4a83fb7698b3643a0e34f9ae6f2e8f0178c0fd42f8b59d493aa271ff3a5bf21"
dependencies = [
"hashbrown 0.14.0",
]
[[package]]
name = "lru-cache"
version = "0.1.2"
@ -1425,6 +1551,12 @@ dependencies = [
"libc",
]
[[package]]
name = "lz4_flex"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ea9b256699eda7b0387ffbc776dd625e28bde3918446381781245b7a50349d8"
[[package]]
name = "maplit"
version = "1.0.2"
@ -1458,12 +1590,31 @@ version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b87248edafb776e59e6ee64a79086f65890d3510f2c656c000bf2a7e8a0aea40"
[[package]]
name = "measure_time"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56220900f1a0923789ecd6bf25fbae8af3b2f1ff3e9e297fc9b6b8674dd4d852"
dependencies = [
"instant",
"log",
]
[[package]]
name = "memchr"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]]
name = "memmap2"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f49388d20533534cd19360ad3d6a7dadc885944aa802ba3995040c5ec11288c6"
dependencies = [
"libc",
]
[[package]]
name = "memoffset"
version = "0.7.1"
@ -1515,6 +1666,12 @@ dependencies = [
"windows-sys 0.48.0",
]
[[package]]
name = "murmurhash32"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9380db4c04d219ac5c51d14996bbf2c2e9a15229771b53f8671eb6c83cf44df"
[[package]]
name = "nix"
version = "0.26.2"
@ -1606,6 +1763,15 @@ version = "1.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
[[package]]
name = "oneshot"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f6640c6bda7731b1fdbab747981a0f896dd1fedaf9f4a53fa237a04a84431f4"
dependencies = [
"loom",
]
[[package]]
name = "openssl-probe"
version = "0.1.5"
@ -1701,6 +1867,15 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
[[package]]
name = "ownedbytes"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e8a72b918ae8198abb3a18c190288123e1d442b6b9a7d709305fd194688b4b7"
dependencies = [
"stable_deref_trait",
]
[[package]]
name = "page_size"
version = "0.4.2"
@ -1960,6 +2135,26 @@ dependencies = [
"getrandom",
]
[[package]]
name = "rayon"
version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1"
dependencies = [
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed"
dependencies = [
"crossbeam-deque",
"crossbeam-utils",
]
[[package]]
name = "redox_syscall"
version = "0.2.16"
@ -2288,7 +2483,7 @@ version = "0.29.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "549b9d036d571d42e6e85d1c1425e2ac83491075078ca9a15be021c56b1641f2"
dependencies = [
"bitflags 2.3.2",
"bitflags 2.4.0",
"fallible-iterator",
"fallible-streaming-iterator",
"hashlink",
@ -2308,6 +2503,16 @@ dependencies = [
"crossbeam-utils",
]
[[package]]
name = "rust-stemmers"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e46a2036019fdb888131db7a4c847a1063a7493f971ed94ea82c67eada63ca54"
dependencies = [
"serde",
"serde_derive",
]
[[package]]
name = "rustc-hash"
version = "1.1.0"
@ -2323,6 +2528,19 @@ dependencies = [
"semver",
]
[[package]]
name = "rustix"
version = "0.38.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac5ffa1efe7548069688cd7028f32591853cd7b5b756d41bcffd2353e4fc75b4"
dependencies = [
"bitflags 2.4.0",
"errno",
"libc",
"linux-raw-sys",
"windows-sys 0.48.0",
]
[[package]]
name = "rustls"
version = "0.20.8"
@ -2408,6 +2626,12 @@ dependencies = [
"windows-sys 0.42.0",
]
[[package]]
name = "scoped-tls"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294"
[[package]]
name = "scopeguard"
version = "1.1.0"
@ -2627,6 +2851,15 @@ dependencies = [
"time",
]
[[package]]
name = "sketches-ddsketch"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68a406c1882ed7f29cd5e248c9848a80e7cb6ae0fea82346d2746f2f941c07e1"
dependencies = [
"serde",
]
[[package]]
name = "slab"
version = "0.4.8"
@ -2678,6 +2911,12 @@ dependencies = [
"der",
]
[[package]]
name = "stable_deref_trait"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "static_assertions"
version = "1.1.0"
@ -2736,6 +2975,159 @@ dependencies = [
"crossbeam-queue",
]
[[package]]
name = "tantivy"
version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1d4675fed6fe2218ce11445374e181e864a8ffd0f28e7e0591ccfc38cd000ae"
dependencies = [
"aho-corasick",
"arc-swap",
"async-trait",
"base64 0.21.2",
"bitpacking",
"byteorder",
"census",
"crc32fast",
"crossbeam-channel",
"downcast-rs",
"fastdivide",
"fs4",
"htmlescape",
"itertools",
"levenshtein_automata",
"log",
"lru",
"lz4_flex",
"measure_time",
"memmap2",
"murmurhash32",
"num_cpus",
"once_cell",
"oneshot",
"rayon",
"regex",
"rust-stemmers",
"rustc-hash",
"serde",
"serde_json",
"sketches-ddsketch",
"smallvec",
"tantivy-bitpacker",
"tantivy-columnar",
"tantivy-common",
"tantivy-fst",
"tantivy-query-grammar",
"tantivy-stacker",
"tantivy-tokenizer-api",
"tempfile",
"thiserror",
"time",
"uuid",
"winapi",
]
[[package]]
name = "tantivy-bitpacker"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cecb164321482301f514dd582264fa67f70da2d7eb01872ccd71e35e0d96655a"
dependencies = [
"bitpacking",
]
[[package]]
name = "tantivy-columnar"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d85f8019af9a78b3118c11298b36ffd21c2314bd76bbcd9d12e00124cbb7e70"
dependencies = [
"fastdivide",
"fnv",
"itertools",
"serde",
"tantivy-bitpacker",
"tantivy-common",
"tantivy-sstable",
"tantivy-stacker",
]
[[package]]
name = "tantivy-common"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af4a3a975e604a2aba6b1106a04505e1e7a025e6def477fab6e410b4126471e1"
dependencies = [
"async-trait",
"byteorder",
"ownedbytes",
"serde",
"time",
]
[[package]]
name = "tantivy-fst"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc3c506b1a8443a3a65352df6382a1fb6a7afe1a02e871cee0d25e2c3d5f3944"
dependencies = [
"byteorder",
"regex-syntax 0.6.29",
"utf8-ranges",
]
[[package]]
name = "tantivy-query-grammar"
version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d39c5a03100ac10c96e0c8b07538e2ab8b17da56434ab348309b31f23fada77"
dependencies = [
"nom",
]
[[package]]
name = "tantivy-sstable"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc0c1bb43e5e8b8e05eb8009610344dbf285f06066c844032fbb3e546b3c71df"
dependencies = [
"tantivy-common",
"tantivy-fst",
"zstd",
]
[[package]]
name = "tantivy-stacker"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2c078595413f13f218cf6f97b23dcfd48936838f1d3d13a1016e05acd64ed6c"
dependencies = [
"murmurhash32",
"tantivy-common",
]
[[package]]
name = "tantivy-tokenizer-api"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "347b6fb212b26d3505d224f438e3c4b827ab8bd847fe9953ad5ac6b8f9443b66"
dependencies = [
"serde",
]
[[package]]
name = "tempfile"
version = "3.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb94d2f3cc536af71caac6b6fcebf65860b347e7ce0cc9ebe8f70d3e521054ef"
dependencies = [
"cfg-if",
"fastrand",
"redox_syscall 0.3.5",
"rustix",
"windows-sys 0.48.0",
]
[[package]]
name = "thiserror"
version = "1.0.40"
@ -3003,7 +3395,7 @@ version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8bd22a874a2d0b70452d5597b12c537331d49060824a95f49f108994f94aa4c"
dependencies = [
"bitflags 2.3.2",
"bitflags 2.4.0",
"bytes",
"futures-core",
"futures-util",
@ -3239,6 +3631,12 @@ dependencies = [
"percent-encoding",
]
[[package]]
name = "utf8-ranges"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fcfc827f90e53a02eaef5e535ee14266c1d569214c6aa70133a624d8a3164ba"
[[package]]
name = "uuid"
version = "1.3.4"
@ -3246,6 +3644,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fa2982af2eec27de306107c027578ff7f423d65f7250e40ce0fea8f45248b81"
dependencies = [
"getrandom",
"serde",
]
[[package]]
@ -3407,6 +3806,15 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-sys"
version = "0.42.0"
@ -3579,6 +3987,25 @@ dependencies = [
"num-traits",
]
[[package]]
name = "zstd"
version = "0.12.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c"
dependencies = [
"zstd-safe",
]
[[package]]
name = "zstd-safe"
version = "6.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee98ffd0b48ee95e6c5168188e44a54550b1564d9d530ee21d5f0eaed1069581"
dependencies = [
"libc",
"zstd-sys",
]
[[package]]
name = "zstd-sys"
version = "2.0.8+zstd.1.5.5"

View file

@ -103,6 +103,7 @@ lazy_static = "1.4.0"
async-trait = "0.1.68"
sd-notify = { version = "0.4.1", optional = true }
tantivy = "0.21.0"
[target.'cfg(unix)'.dependencies]
nix = { version = "0.26.2", features = ["resource"] }

View file

@ -1,67 +0,0 @@
use ruma::RoomId;
use crate::{database::KeyValueDatabase, service, services, utils, Result};
impl service::rooms::search::Data for KeyValueDatabase {
fn index_pdu<'a>(&self, shortroomid: u64, pdu_id: &[u8], message_body: &str) -> Result<()> {
let mut batch = message_body
.split_terminator(|c: char| !c.is_alphanumeric())
.filter(|s| !s.is_empty())
.filter(|word| word.len() <= 50)
.map(str::to_lowercase)
.map(|word| {
let mut key = shortroomid.to_be_bytes().to_vec();
key.extend_from_slice(word.as_bytes());
key.push(0xff);
key.extend_from_slice(pdu_id); // TODO: currently we save the room id a second time here
(key, Vec::new())
});
self.tokenids.insert_batch(&mut batch)
}
fn search_pdus<'a>(
&'a self,
room_id: &RoomId,
search_string: &str,
) -> Result<Option<(Box<dyn Iterator<Item = Vec<u8>> + 'a>, Vec<String>)>> {
let prefix = services()
.rooms
.short
.get_shortroomid(room_id)?
.expect("room exists")
.to_be_bytes()
.to_vec();
let words: Vec<_> = search_string
.split_terminator(|c: char| !c.is_alphanumeric())
.filter(|s| !s.is_empty())
.map(str::to_lowercase)
.collect();
let iterators = words.clone().into_iter().map(move |word| {
let mut prefix2 = prefix.clone();
prefix2.extend_from_slice(word.as_bytes());
prefix2.push(0xff);
let prefix3 = prefix2.clone();
let mut last_possible_id = prefix2.clone();
last_possible_id.extend_from_slice(&u64::MAX.to_be_bytes());
self.tokenids
.iter_from(&last_possible_id, true) // Newest pdus first
.take_while(move |(k, _)| k.starts_with(&prefix2))
.map(move |(key, _)| key[prefix3.len()..].to_vec())
});
let common_elements = match utils::common_elements(iterators, |a, b| {
// We compare b with a because we reversed the iterator earlier
b.cmp(a)
}) {
Some(it) => it,
None => return Ok(None),
};
Ok(Some((Box::new(common_elements), words)))
}
}

View file

@ -24,7 +24,7 @@ use std::{
fs::{self, remove_dir_all},
io::Write,
mem::size_of,
path::Path,
path::{Path, PathBuf},
sync::{Arc, Mutex, RwLock},
time::Duration,
};
@ -174,6 +174,63 @@ pub struct KeyValueDatabase {
pub(super) lasttimelinecount_cache: Mutex<HashMap<OwnedRoomId, PduCount>>,
}
pub struct TantivyDatabase {
reader: tantivy::IndexReader,
writer: tantivy::IndexWriter,
}
impl TantivyDatabase {
pub fn open_or_create(path: &Path) -> Self {
use tantivy::schema::*;
let mut schema = SchemaBuilder::new();
schema.add_u64_field("short_room_id", STORED);
schema.add_u64_field("origin_server_ts", FAST);
schema.add_text_field("event_id", STRING | STORED);
schema.add_text_field("type", STRING);
schema.add_text_field("body", TEXT);
let index = tantivy::IndexBuilder::new()
.schema(schema.build())
.open_or_create(tantivy::directory::MmapDirectory::open(path).unwrap())
.expect("good error handling");
Self {
reader: index.reader().unwrap(),
writer: index.writer(1024 * 1024 * 16).unwrap(),
}
}
}
impl crate::service::rooms::search::Data for TantivyDatabase {
fn index_pdu(&self, pdu: &PduEvent) -> Result<()> {
self.writer.add_document(todo!()).unwrap();
// self.writer.commit().unwrap();
Ok(())
}
fn deindex_pdu(&self, id: &EventId) -> Result<()> {
self.writer.delete_term(todo!());
// self.writer.commit().unwrap();
Ok(())
}
fn search_pdus<'a>(
&'a self,
room_id: &RoomId,
query: &str,
) -> Result<Option<(Box<dyn Iterator<Item = Vec<u8>> + 'a>, Vec<String>)>> {
let parser = tantivy::query::QueryParser::for_index(self.writer.index(), vec![]);
let q = parser.parse_query_lenient(query).0;
let collector = tantivy::collector::TopDocs::with_limit(100)
.and_offset(0)
.order_by_fast_field("origin_server_ts", tantivy::Order::Desc);
let results = self.reader.searcher().search(&q, &collector).unwrap();
let iter = results.into_iter().map(|item| self.reader.searcher().doc(item.1.doc_id).unwrap().get_first(todo!("search")));
Ok(Some(Box::new(iter)))
}
}
impl KeyValueDatabase {
/// Tries to remove the old database but ignores all errors.
pub fn try_remove(server_name: &str) -> Result<()> {
@ -401,7 +458,12 @@ impl KeyValueDatabase {
let db = Box::leak(db_raw);
let services_raw = Box::new(Services::build(db, config)?);
let mut tantivy_path = PathBuf::new();
tantivy_path.push(config.database_path.clone());
tantivy_path.push("tantivy");
let search = Box::leak(Box::new(TantivyDatabase::open_or_create(&tantivy_path)));
let services_raw = Box::new(Services::build(db, search, config)?);
// This is the first and only time we initialize the SERVICE static
*SERVICES.write().unwrap() = Some(Box::leak(services_raw));

View file

@ -50,8 +50,11 @@ impl Services {
+ media::Data
+ sending::Data
+ 'static,
S: rooms::search::Data
+ 'static,
>(
db: &'static D,
search: &'static S,
config: Config,
) -> Result<Self> {
Ok(Self {
@ -74,7 +77,7 @@ impl Services {
metadata: rooms::metadata::Service { db },
outlier: rooms::outlier::Service { db },
pdu_metadata: rooms::pdu_metadata::Service { db },
search: rooms::search::Service { db },
search: rooms::search::Service { search },
short: rooms::short::Service { db },
state: rooms::state::Service { db },
state_accessor: rooms::state_accessor::Service {

View file

@ -27,7 +27,6 @@ pub trait Data:
+ metadata::Data
+ outlier::Data
+ pdu_metadata::Data
+ search::Data
+ short::Data
+ state::Data
+ state_accessor::Data

View file

@ -1,12 +1,14 @@
use crate::Result;
use ruma::RoomId;
use crate::{Result, PduEvent};
use ruma::{RoomId, EventId};
pub trait Data: Send + Sync {
fn index_pdu(&self, shortroomid: u64, pdu_id: &[u8], message_body: &str) -> Result<()>;
fn index_pdu(&self, pdu: &PduEvent) -> Result<()>;
fn deindex_pdu(&self, id: &EventId) -> Result<()>;
fn search_pdus<'a>(
&'a self,
room_id: &RoomId,
search_string: &str,
query: &str,
) -> Result<Option<(Box<dyn Iterator<Item = Vec<u8>> + 'a>, Vec<String>)>>;
}

View file

@ -2,25 +2,30 @@ mod data;
pub use data::Data;
use crate::Result;
use ruma::RoomId;
use crate::{Result, PduEvent};
use ruma::{RoomId, EventId};
pub struct Service {
pub db: &'static dyn Data,
pub search: &'static dyn Data,
}
impl Service {
#[tracing::instrument(skip(self))]
pub fn index_pdu<'a>(&self, shortroomid: u64, pdu_id: &[u8], message_body: &str) -> Result<()> {
self.db.index_pdu(shortroomid, pdu_id, message_body)
pub fn index_pdu<'a>(&self, pdu: &PduEvent) -> Result<()> {
self.search.index_pdu(pdu)
}
#[tracing::instrument(skip(self))]
pub fn deindex_pdu<'a>(&self, pdu: &EventId) -> Result<()> {
self.search.deindex_pdu(pdu)
}
#[tracing::instrument(skip(self))]
pub fn search_pdus<'a>(
&'a self,
room_id: &RoomId,
search_string: &str,
query: &str,
) -> Result<Option<(impl Iterator<Item = Vec<u8>> + 'a, Vec<String>)>> {
self.db.search_pdus(room_id, search_string)
self.search.search_pdus(room_id, query)
}
}

View file

@ -445,7 +445,7 @@ impl Service {
services()
.rooms
.search
.index_pdu(shortroomid, &pdu_id, &body)?;
.index_pdu(pdu)?;
let admin_room = services().rooms.alias.resolve_local_alias(
<&RoomAliasId>::try_from(
@ -1189,7 +1189,7 @@ impl Service {
services()
.rooms
.search
.index_pdu(shortroomid, &pdu_id, &body)?;
.index_pdu(&pdu)?;
}
}
_ => {}