TMP add something to print the estimated trie size

This commit is contained in:
Michael Davis
2025-07-09 22:27:54 -04:00
parent 623afc4f84
commit 75ab173caf
3 changed files with 33 additions and 0 deletions

View File

@@ -33,6 +33,9 @@ fn main() {
let dict = Dictionary::new(EN_US_AFF, EN_US_DIC).unwrap();
println!("Compiled the dictionary in {:?}", now.elapsed());
let (size, subtries) = dict.estimated_size();
println!("Estimated dictionary size: {size} (subtries: {subtries})");
let now = Instant::now();
if dict.check(&word) {
println!(

View File

@@ -43,6 +43,32 @@ impl<K, V, S> HashArrayMappedTrie<K, V, S> {
let root = TrieNodeIter::Entries(self.root.entries().iter());
Iter { stack: vec![root] }
}
const ITEM_SIZE: usize = size_of::<(K, V)>();
fn trie_size(trie: &SparseArray<Entry<(K, V)>>) -> (usize, usize) {
let mut subtries = 0;
let mut size = size_of::<Bitmap>() + size_of::<Entry<(K, V)>>() * trie.bitmap().len();
for entry in trie.entries() {
size += match entry {
Entry::Subtrie(subtrie) => {
let (s, t) = Self::trie_size(subtrie);
subtries += 1 + t;
s
}
Entry::Leaf { .. } => 0,
Entry::Collision { data } => Self::ITEM_SIZE * data.len(),
}
}
(size, subtries)
}
pub fn estimated_size(&self) -> (usize, usize) {
// TODO: another interesting thing to track could be a histogram of
// trie loads.
let (size, subtries) = Self::trie_size(&self.root);
(size_of::<Self>() + size, subtries)
}
}
impl<K, V, S> HashArrayMappedTrie<K, V, S>

View File

@@ -138,6 +138,10 @@ impl Dictionary<DefaultHashBuilder> {
pub fn new(aff: &str, dic: &str) -> Result<Self, ParseDictionaryError> {
Self::new_with_hasher(aff, dic, DefaultHashBuilder::default())
}
pub fn estimated_size(&self) -> (usize, usize) {
self.words.estimated_size()
}
}
impl<S: BuildHasher + Clone> Dictionary<S> {