1
1
mirror of https://github.com/MarginaliaSearch/MarginaliaSearch.git synced 2025-10-05 21:22:39 +02:00

(index) Aggregate termdata reads into a single io_uring operation instead of one for each term

This commit is contained in:
Viktor Lofgren
2025-08-09 17:43:18 +02:00
parent 0610cc19ad
commit 1d2ab21e27
5 changed files with 46 additions and 9 deletions

View File

@@ -164,6 +164,33 @@ public class FullReverseIndexReader {
return new SkipListReader(dataPool, offset);
}
public TermData[] getTermData(Arena arena,
long[] termIds,
long[] docIds)
{
long[] offsetsAll = new long[termIds.length * docIds.length];
for (int i = 0; i < termIds.length; i++) {
long termId = termIds[i];
long offset = wordOffset(termId);
if (offset < 0) {
// This is likely a bug in the code, but we can't throw an exception here
logger.debug("Missing offset for word {}", termId);
continue;
}
var reader = getReader(offset);
// Read the size and offset of the position data
var offsetsForTerm = reader.getValueOffsets(docIds);
System.arraycopy(offsetsForTerm, 0, offsetsAll, i * docIds.length, docIds.length);
}
return positionsFileReader.getTermData(arena, offsetsAll);
}
public TermData[] getTermData(Arena arena,
long termId,
long[] docIds)

View File

@@ -12,6 +12,7 @@ import nu.marginalia.index.forward.ForwardIndexReader;
import nu.marginalia.index.forward.spans.DocumentSpans;
import nu.marginalia.index.model.QueryParams;
import nu.marginalia.index.model.SearchTerms;
import nu.marginalia.index.positions.TermData;
import nu.marginalia.index.query.IndexQuery;
import nu.marginalia.index.query.IndexQueryBuilder;
import nu.marginalia.index.query.IndexSearchBudget;
@@ -27,6 +28,7 @@ import org.slf4j.LoggerFactory;
import java.lang.foreign.Arena;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.TimeUnit;
@@ -184,6 +186,18 @@ public class CombinedIndexReader {
}
/** Retrieves the term metadata for the specified word for the provided documents */
public TermMetadataList[] getTermMetadata(Arena arena,
long[] wordIds,
CombinedDocIdList docIds)
{
TermData[] combinedTermData = reverseIndexFullReader.getTermData(arena, wordIds, docIds.array());
TermMetadataList[] ret = new TermMetadataList[wordIds.length];
for (int i = 0; i < wordIds.length; i++) {
ret[i] = new TermMetadataList(Arrays.copyOfRange(combinedTermData, i*docIds.size(), (i+1)*docIds.size()));
}
return ret;
}
public TermMetadataList getTermMetadata(Arena arena,
long wordId,
CombinedDocIdList docIds)

View File

@@ -92,13 +92,9 @@ public class IndexResultRankingService {
// Perform expensive I/O operations
this.termsForDocs = new TermMetadataList[termCount];
for (int ti = 0; ti < termCount; ti++) {
termsForDocs[ti] = currentIndex.getTermMetadata(arena, searchTerms.termIdsAll.at(ti), resultIds);
if (budget != null && !budget.hasTimeLeft())
throw new TimeoutException();
}
this.termsForDocs = currentIndex.getTermMetadata(arena, searchTerms.termIdsAll.array, resultIds);
if (!budget.hasTimeLeft())
throw new TimeoutException();
this.documentSpans = currentIndex.getDocumentSpans(arena, resultIds);
}

View File

@@ -6,7 +6,7 @@ import java.util.Arrays;
import java.util.stream.LongStream;
public final class TermIdList {
private final long[] array;
public final long[] array;
public TermIdList(long[] array) {
this.array = array;

View File

@@ -15,7 +15,7 @@ public class UringFileReader implements AutoCloseable {
private final int fd;
private final boolean direct;
private static final int QUEUE_SIZE = 1024;
private static final int QUEUE_SIZE = 8192;
public UringFileReader(Path filename, boolean direct) throws IOException {
if (direct) {