mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-05 21:22:39 +02:00
(index) Permit fast termination of rejection filter execution
This commit is contained in:
@@ -140,13 +140,13 @@ public class FullReverseIndexReader {
|
||||
}
|
||||
|
||||
/** Create a filter step requiring the specified termId to be absent from the documents */
|
||||
public QueryFilterStepIf not(long termId) {
|
||||
public QueryFilterStepIf not(long termId, IndexSearchBudget budget) {
|
||||
long offset = wordOffset(termId);
|
||||
|
||||
if (offset < 0) // No documents
|
||||
return new QueryFilterLetThrough();
|
||||
|
||||
return new ReverseIndexRejectFilter(getReader(offset));
|
||||
return new ReverseIndexRejectFilter(getReader(offset), budget);
|
||||
}
|
||||
|
||||
/** Return the number of documents with the termId in the index */
|
||||
|
@@ -4,11 +4,12 @@ import nu.marginalia.array.page.LongQueryBuffer;
|
||||
import nu.marginalia.index.query.filter.QueryFilterStepIf;
|
||||
import nu.marginalia.skiplist.SkipListReader;
|
||||
|
||||
public record ReverseIndexRejectFilter(SkipListReader range) implements QueryFilterStepIf {
|
||||
public record ReverseIndexRejectFilter(SkipListReader range, IndexSearchBudget budget) implements QueryFilterStepIf {
|
||||
|
||||
@Override
|
||||
public void apply(LongQueryBuffer buffer) {
|
||||
range.rejectData(buffer);
|
||||
while (budget.hasTimeLeft() && range.tryRejectData(buffer));
|
||||
|
||||
buffer.finalizeFiltering();
|
||||
}
|
||||
|
||||
|
@@ -8,11 +8,8 @@ public record ReverseIndexRetainFilter(SkipListReader range, String name, long w
|
||||
|
||||
@Override
|
||||
public void apply(LongQueryBuffer buffer) {
|
||||
while (budget.hasTimeLeft()) {
|
||||
if (!range.tryRetainData(buffer)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
while (budget.hasTimeLeft() && range.tryRetainData(buffer));
|
||||
|
||||
buffer.finalizeFiltering();
|
||||
}
|
||||
|
||||
|
@@ -155,7 +155,7 @@ public class CombinedIndexReader {
|
||||
}
|
||||
|
||||
for (long term : terms.excludes()) {
|
||||
query = query.not(term);
|
||||
query = query.not(term, budget);
|
||||
}
|
||||
|
||||
// Run these filter steps last, as they'll worst-case cause as many page faults as there are
|
||||
|
@@ -40,9 +40,9 @@ public class IndexQueryBuilderImpl implements IndexQueryBuilder {
|
||||
return this;
|
||||
}
|
||||
|
||||
public IndexQueryBuilder not(long termId) {
|
||||
public IndexQueryBuilder not(long termId, IndexSearchBudget budget) {
|
||||
|
||||
query.addInclusionFilter(reverseIndexFullReader.not(termId));
|
||||
query.addInclusionFilter(reverseIndexFullReader.not(termId, budget));
|
||||
|
||||
return this;
|
||||
}
|
||||
|
@@ -13,7 +13,7 @@ public interface IndexQueryBuilder {
|
||||
|
||||
/** Excludes documents that contain termId, within the full index
|
||||
*/
|
||||
IndexQueryBuilder not(long termId);
|
||||
IndexQueryBuilder not(long termId, IndexSearchBudget budget);
|
||||
|
||||
IndexQueryBuilder addInclusionFilter(QueryFilterStepIf filterStep);
|
||||
|
||||
|
@@ -200,7 +200,7 @@ public class CombinedIndexReaderTest {
|
||||
var reader = indexFactory.getCombinedIndexReader();
|
||||
var query = reader.findFullWord(kw("hello"))
|
||||
.also(kw("world"), new IndexSearchBudget(10_000))
|
||||
.not(kw("goodbye"))
|
||||
.not(kw("goodbye"), new IndexSearchBudget(10_000))
|
||||
.build();
|
||||
|
||||
var buffer = new LongQueryBuffer(32);
|
||||
|
@@ -307,6 +307,45 @@ public class SkipListReader {
|
||||
return currentBlockIdx >= n;
|
||||
}
|
||||
|
||||
public boolean tryRejectData(@NotNull LongQueryBuffer data) {
|
||||
try (var page = pool.get(currentBlock)) {
|
||||
|
||||
int n = headerNumRecords(page, currentBlockOffset);
|
||||
int fc = headerForwardCount(page, currentBlockOffset);
|
||||
int flags = headerFlags(page, currentBlockOffset);
|
||||
|
||||
int dataOffset = SkipListConstants.pageDataOffset(currentBlockOffset, fc);
|
||||
if (rejectInPage(page, dataOffset, n, data)) {
|
||||
atEnd = (flags & SkipListConstants.FLAG_END_BLOCK) != 0;
|
||||
if (atEnd) {
|
||||
while (data.hasMore())
|
||||
data.retainAndAdvance();
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!data.hasMore()) {
|
||||
currentBlock += SkipListConstants.BLOCK_SIZE;
|
||||
}
|
||||
else {
|
||||
long nextBlock = currentBlock + (long) SkipListConstants.BLOCK_SIZE;
|
||||
long currentValue = data.currentValue();
|
||||
for (int i = 0; i < fc; i++) {
|
||||
long blockMaxValue = page.getLong(currentBlockOffset + SkipListConstants.HEADER_SIZE + 8 * i);
|
||||
nextBlock = currentBlock + (long) SkipListConstants.BLOCK_SIZE * SkipListConstants.skipOffsetForPointer(Math.max(0, i-1));
|
||||
if (blockMaxValue >= currentValue) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
currentBlockOffset = 0;
|
||||
currentBlockIdx = 0;
|
||||
currentBlock = nextBlock;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return data.hasMore();
|
||||
}
|
||||
|
||||
public void rejectData(@NotNull LongQueryBuffer data) {
|
||||
while (data.hasMore()) {
|
||||
try (var page = pool.get(currentBlock)) {
|
||||
|
Reference in New Issue
Block a user