mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-05 21:22:39 +02:00
(index) Aggregate termdata reads into a single io_uring operation instead of one for each term
This commit is contained in:
@@ -164,6 +164,33 @@ public class FullReverseIndexReader {
|
||||
return new SkipListReader(dataPool, offset);
|
||||
}
|
||||
|
||||
public TermData[] getTermData(Arena arena,
|
||||
long[] termIds,
|
||||
long[] docIds)
|
||||
{
|
||||
|
||||
long[] offsetsAll = new long[termIds.length * docIds.length];
|
||||
|
||||
for (int i = 0; i < termIds.length; i++) {
|
||||
long termId = termIds[i];
|
||||
long offset = wordOffset(termId);
|
||||
|
||||
if (offset < 0) {
|
||||
// This is likely a bug in the code, but we can't throw an exception here
|
||||
logger.debug("Missing offset for word {}", termId);
|
||||
continue;
|
||||
}
|
||||
|
||||
var reader = getReader(offset);
|
||||
|
||||
// Read the size and offset of the position data
|
||||
var offsetsForTerm = reader.getValueOffsets(docIds);
|
||||
System.arraycopy(offsetsForTerm, 0, offsetsAll, i * docIds.length, docIds.length);
|
||||
}
|
||||
|
||||
return positionsFileReader.getTermData(arena, offsetsAll);
|
||||
}
|
||||
|
||||
public TermData[] getTermData(Arena arena,
|
||||
long termId,
|
||||
long[] docIds)
|
||||
|
@@ -12,6 +12,7 @@ import nu.marginalia.index.forward.ForwardIndexReader;
|
||||
import nu.marginalia.index.forward.spans.DocumentSpans;
|
||||
import nu.marginalia.index.model.QueryParams;
|
||||
import nu.marginalia.index.model.SearchTerms;
|
||||
import nu.marginalia.index.positions.TermData;
|
||||
import nu.marginalia.index.query.IndexQuery;
|
||||
import nu.marginalia.index.query.IndexQueryBuilder;
|
||||
import nu.marginalia.index.query.IndexSearchBudget;
|
||||
@@ -27,6 +28,7 @@ import org.slf4j.LoggerFactory;
|
||||
import java.lang.foreign.Arena;
|
||||
import java.time.Duration;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
@@ -184,6 +186,18 @@ public class CombinedIndexReader {
|
||||
}
|
||||
|
||||
/** Retrieves the term metadata for the specified word for the provided documents */
|
||||
public TermMetadataList[] getTermMetadata(Arena arena,
|
||||
long[] wordIds,
|
||||
CombinedDocIdList docIds)
|
||||
{
|
||||
TermData[] combinedTermData = reverseIndexFullReader.getTermData(arena, wordIds, docIds.array());
|
||||
TermMetadataList[] ret = new TermMetadataList[wordIds.length];
|
||||
for (int i = 0; i < wordIds.length; i++) {
|
||||
ret[i] = new TermMetadataList(Arrays.copyOfRange(combinedTermData, i*docIds.size(), (i+1)*docIds.size()));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
public TermMetadataList getTermMetadata(Arena arena,
|
||||
long wordId,
|
||||
CombinedDocIdList docIds)
|
||||
|
@@ -92,13 +92,9 @@ public class IndexResultRankingService {
|
||||
|
||||
// Perform expensive I/O operations
|
||||
|
||||
this.termsForDocs = new TermMetadataList[termCount];
|
||||
for (int ti = 0; ti < termCount; ti++) {
|
||||
termsForDocs[ti] = currentIndex.getTermMetadata(arena, searchTerms.termIdsAll.at(ti), resultIds);
|
||||
if (budget != null && !budget.hasTimeLeft())
|
||||
throw new TimeoutException();
|
||||
}
|
||||
|
||||
this.termsForDocs = currentIndex.getTermMetadata(arena, searchTerms.termIdsAll.array, resultIds);
|
||||
if (!budget.hasTimeLeft())
|
||||
throw new TimeoutException();
|
||||
this.documentSpans = currentIndex.getDocumentSpans(arena, resultIds);
|
||||
}
|
||||
|
||||
|
@@ -6,7 +6,7 @@ import java.util.Arrays;
|
||||
import java.util.stream.LongStream;
|
||||
|
||||
public final class TermIdList {
|
||||
private final long[] array;
|
||||
public final long[] array;
|
||||
|
||||
public TermIdList(long[] array) {
|
||||
this.array = array;
|
||||
|
@@ -15,7 +15,7 @@ public class UringFileReader implements AutoCloseable {
|
||||
private final int fd;
|
||||
private final boolean direct;
|
||||
|
||||
private static final int QUEUE_SIZE = 1024;
|
||||
private static final int QUEUE_SIZE = 8192;
|
||||
|
||||
public UringFileReader(Path filename, boolean direct) throws IOException {
|
||||
if (direct) {
|
||||
|
Reference in New Issue
Block a user