1
1
mirror of https://github.com/MarginaliaSearch/MarginaliaSearch.git synced 2025-10-05 21:22:39 +02:00

(index) Permit fast termination of rejection filter execution

This commit is contained in:
Viktor Lofgren
2025-08-09 23:36:59 +02:00
parent 9a65946e22
commit c5b5b0c699
8 changed files with 51 additions and 14 deletions

View File

@@ -140,13 +140,13 @@ public class FullReverseIndexReader {
}
/** Create a filter step requiring the specified termId to be absent from the documents */
public QueryFilterStepIf not(long termId) {
public QueryFilterStepIf not(long termId, IndexSearchBudget budget) {
long offset = wordOffset(termId);
if (offset < 0) // No documents
return new QueryFilterLetThrough();
return new ReverseIndexRejectFilter(getReader(offset));
return new ReverseIndexRejectFilter(getReader(offset), budget);
}
/** Return the number of documents with the termId in the index */

View File

@@ -4,11 +4,12 @@ import nu.marginalia.array.page.LongQueryBuffer;
import nu.marginalia.index.query.filter.QueryFilterStepIf;
import nu.marginalia.skiplist.SkipListReader;
public record ReverseIndexRejectFilter(SkipListReader range) implements QueryFilterStepIf {
public record ReverseIndexRejectFilter(SkipListReader range, IndexSearchBudget budget) implements QueryFilterStepIf {
@Override
public void apply(LongQueryBuffer buffer) {
range.rejectData(buffer);
while (budget.hasTimeLeft() && range.tryRejectData(buffer));
buffer.finalizeFiltering();
}

View File

@@ -8,11 +8,8 @@ public record ReverseIndexRetainFilter(SkipListReader range, String name, long w
@Override
public void apply(LongQueryBuffer buffer) {
while (budget.hasTimeLeft()) {
if (!range.tryRetainData(buffer)) {
return;
}
}
while (budget.hasTimeLeft() && range.tryRetainData(buffer));
buffer.finalizeFiltering();
}

View File

@@ -155,7 +155,7 @@ public class CombinedIndexReader {
}
for (long term : terms.excludes()) {
query = query.not(term);
query = query.not(term, budget);
}
// Run these filter steps last, as they'll worst-case cause as many page faults as there are

View File

@@ -40,9 +40,9 @@ public class IndexQueryBuilderImpl implements IndexQueryBuilder {
return this;
}
public IndexQueryBuilder not(long termId) {
public IndexQueryBuilder not(long termId, IndexSearchBudget budget) {
query.addInclusionFilter(reverseIndexFullReader.not(termId));
query.addInclusionFilter(reverseIndexFullReader.not(termId, budget));
return this;
}

View File

@@ -13,7 +13,7 @@ public interface IndexQueryBuilder {
/** Excludes documents that contain termId, within the full index
*/
IndexQueryBuilder not(long termId);
IndexQueryBuilder not(long termId, IndexSearchBudget budget);
IndexQueryBuilder addInclusionFilter(QueryFilterStepIf filterStep);

View File

@@ -200,7 +200,7 @@ public class CombinedIndexReaderTest {
var reader = indexFactory.getCombinedIndexReader();
var query = reader.findFullWord(kw("hello"))
.also(kw("world"), new IndexSearchBudget(10_000))
.not(kw("goodbye"))
.not(kw("goodbye"), new IndexSearchBudget(10_000))
.build();
var buffer = new LongQueryBuffer(32);

View File

@@ -307,6 +307,45 @@ public class SkipListReader {
return currentBlockIdx >= n;
}
public boolean tryRejectData(@NotNull LongQueryBuffer data) {
try (var page = pool.get(currentBlock)) {
int n = headerNumRecords(page, currentBlockOffset);
int fc = headerForwardCount(page, currentBlockOffset);
int flags = headerFlags(page, currentBlockOffset);
int dataOffset = SkipListConstants.pageDataOffset(currentBlockOffset, fc);
if (rejectInPage(page, dataOffset, n, data)) {
atEnd = (flags & SkipListConstants.FLAG_END_BLOCK) != 0;
if (atEnd) {
while (data.hasMore())
data.retainAndAdvance();
return false;
}
if (!data.hasMore()) {
currentBlock += SkipListConstants.BLOCK_SIZE;
}
else {
long nextBlock = currentBlock + (long) SkipListConstants.BLOCK_SIZE;
long currentValue = data.currentValue();
for (int i = 0; i < fc; i++) {
long blockMaxValue = page.getLong(currentBlockOffset + SkipListConstants.HEADER_SIZE + 8 * i);
nextBlock = currentBlock + (long) SkipListConstants.BLOCK_SIZE * SkipListConstants.skipOffsetForPointer(Math.max(0, i-1));
if (blockMaxValue >= currentValue) {
break;
}
}
currentBlockOffset = 0;
currentBlockIdx = 0;
currentBlock = nextBlock;
}
}
}
return data.hasMore();
}
public void rejectData(@NotNull LongQueryBuffer data) {
while (data.hasMore()) {
try (var page = pool.get(currentBlock)) {