(index) Partition keyword lexicons by language

(refac) Fold ft-anchor-keywords into converting-process
2025-10-05 21:22:39 +02:00 · 2025-09-04 17:24:48 +02:00 · 2025-09-03 13:04:30 +02:00 · 2025-09-03 13:03:38 +02:00 · 2025-09-03 12:59:10 +02:00 · 2025-09-03 12:54:18 +02:00
397 changed files with 10552 additions and 8038 deletions
--- a/build.gradle
+++ b/build.gradle
@@ -6,6 +6,7 @@ plugins {
    // This is a workaround for a bug in the Jib plugin that causes it to stall randomly
    // https://github.com/GoogleContainerTools/jib/issues/3347
    id 'com.google.cloud.tools.jib' version '3.4.5' apply(false)
+    id 'com.adarshr.test-logger' version '4.0.0'
 }

 group 'marginalia'
@@ -31,7 +32,10 @@ subprojects.forEach {it ->
        jvmArgs += ['--enable-preview']
    }
    it.tasks.withType(Test).configureEach {
-        jvmArgs += ['--enable-preview']
+        jvmArgs += ['--enable-preview',
+                    '--enable-native-access=ALL-UNNAMED',
+                    '--sun-misc-unsafe-memory-access=allow',
+                    '-Dsystem.uringQueueCount=1']
    }

    // Enable reproducible builds for the entire project
--- a/code/common/service/resources/log4j2-json.xml
+++ b/code/common/service/resources/log4j2-json.xml
--- a/code/common/service/resources/log4j2-prod.xml
+++ b/code/common/service/resources/log4j2-prod.xml
--- a/code/common/service/resources/log4j2-test.xml
+++ b/code/common/service/resources/log4j2-test.xml
--- a/code/common/linkdb/java/nu/marginalia/linkdb/docs/DocumentDbReader.java
+++ b/code/common/linkdb/java/nu/marginalia/linkdb/docs/DocumentDbReader.java
@@ -6,7 +6,6 @@ import com.google.inject.name.Named;
 import gnu.trove.list.TLongList;
 import nu.marginalia.linkdb.model.DocdbUrlDetail;
 import nu.marginalia.model.EdgeUrl;
-import nu.marginalia.model.id.UrlIdCodec;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

@@ -14,7 +13,6 @@ import java.io.IOException;
 import java.net.URISyntaxException;
 import java.nio.file.Files;
 import java.nio.file.Path;
-
 import java.nio.file.StandardCopyOption;
 import java.sql.Connection;
 import java.sql.DriverManager;
@@ -104,7 +102,7 @@ public class DocumentDbReader {
        }

        try (var stmt = connection.prepareStatement("""
-                SELECT ID, URL, TITLE, DESCRIPTION, WORDS_TOTAL, FORMAT, FEATURES, DATA_HASH, QUALITY, PUB_YEAR
+                SELECT ID, URL, TITLE, DESCRIPTION, LANGUAGE, WORDS_TOTAL, FORMAT, FEATURES, DATA_HASH, QUALITY, PUB_YEAR
                FROM DOCUMENT WHERE ID = ?
                """)) {
            for (int i = 0; i < ids.size(); i++) {
@@ -118,6 +116,7 @@ public class DocumentDbReader {
                            url,
                            rs.getString("TITLE"),
                            rs.getString("DESCRIPTION"),
+                            rs.getString("LANGUAGE"),
                            rs.getDouble("QUALITY"),
                            rs.getString("FORMAT"),
                            rs.getInt("FEATURES"),
--- a/code/common/linkdb/java/nu/marginalia/linkdb/docs/DocumentDbWriter.java
+++ b/code/common/linkdb/java/nu/marginalia/linkdb/docs/DocumentDbWriter.java
@@ -41,8 +41,8 @@ public class DocumentDbWriter {
    public void add(List<DocdbUrlDetail> docdbUrlDetail) throws SQLException {

        try (var stmt = connection.prepareStatement("""
-                INSERT OR IGNORE INTO DOCUMENT(ID, URL, TITLE, DESCRIPTION, WORDS_TOTAL, FORMAT, FEATURES, DATA_HASH, QUALITY, PUB_YEAR)
-                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+                INSERT OR IGNORE INTO DOCUMENT(ID, URL, TITLE, DESCRIPTION, LANGUAGE, WORDS_TOTAL, FORMAT, FEATURES, DATA_HASH, QUALITY, PUB_YEAR)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
                """)) {

            int i = 0;
@@ -54,15 +54,16 @@ public class DocumentDbWriter {

                stmt.setString(3, document.title());
                stmt.setString(4, document.description());
-                stmt.setInt(5, document.wordsTotal());
-                stmt.setString(6, document.format());
-                stmt.setInt(7, document.features());
-                stmt.setLong(8, document.dataHash());
-                stmt.setDouble(9, document.urlQuality());
+                stmt.setString(5, document.language());
+                stmt.setInt(6, document.wordsTotal());
+                stmt.setString(7, document.format());
+                stmt.setInt(8, document.features());
+                stmt.setLong(9, document.dataHash());
+                stmt.setDouble(10, document.urlQuality());
                if (document.pubYear() == null) {
-                    stmt.setInt(10, 0);
+                    stmt.setInt(11, 0);
                } else {
-                    stmt.setInt(10, document.pubYear());
+                    stmt.setInt(11, document.pubYear());
                }

                stmt.addBatch();
--- a/code/common/linkdb/java/nu/marginalia/linkdb/model/DocdbUrlDetail.java
+++ b/code/common/linkdb/java/nu/marginalia/linkdb/model/DocdbUrlDetail.java
@@ -6,6 +6,7 @@ public record DocdbUrlDetail(long urlId,
                             EdgeUrl url,
                             String title,
                             String description,
+                             String language,
                             double urlQuality,
                             String format,
                             int features,
--- a/code/common/linkdb/resources/db/docdb-document.sql
+++ b/code/common/linkdb/resources/db/docdb-document.sql
@@ -6,6 +6,7 @@ CREATE TABLE DOCUMENT (
    STATE INT,
    TITLE TEXT NOT NULL,
    DESCRIPTION TEXT NOT NULL,
+    LANGUAGE TEXT NOT NULL,

    WORDS_TOTAL INTEGER NOT NULL,
    FORMAT TEXT NOT NULL,
--- a/code/common/linkdb/test/nu/marginalia/linkdb/DocumentDbWriterTest.java
+++ b/code/common/linkdb/test/nu/marginalia/linkdb/DocumentDbWriterTest.java
@@ -23,6 +23,7 @@ public class DocumentDbWriterTest {
                    new nu.marginalia.model.EdgeUrl("http", new EdgeDomain("example.com"), null, "/", null),
                    "Test",
                    "This is a test",
+                    "en",
                    -4.,
                    "XHTML",
                    5,
--- a/code/common/model/java/nu/marginalia/model/crawl/HtmlFeature.java
+++ b/code/common/model/java/nu/marginalia/model/crawl/HtmlFeature.java
@@ -105,8 +105,6 @@ public enum HtmlFeature {
    }

    public int getFeatureBit() {
-        if (getClass().desiredAssertionStatus() && ordinal() >= 32)
-            throw new IllegalStateException("Attempting to extract feature bit of " + name() + ", with ordinal " + ordinal());
        return (1<< ordinal());
    }
 }
--- a/code/common/service/java/nu/marginalia/service/ServiceId.java
+++ b/code/common/service/java/nu/marginalia/service/ServiceId.java
@@ -7,7 +7,6 @@ public enum ServiceId {
    Search("search-service"),
    Index("index-service"),
    Query("query-service"),
-    Executor("executor-service"),

    Control("control-service"),

--- a/code/common/service/java/nu/marginalia/service/server/GrpcServer.java
+++ b/code/common/service/java/nu/marginalia/service/server/GrpcServer.java
@@ -1,9 +1,9 @@
 package nu.marginalia.service.server;

 import io.grpc.Server;
-import io.grpc.netty.shaded.io.grpc.netty.NettyServerBuilder;
-import io.grpc.netty.shaded.io.netty.channel.nio.NioEventLoopGroup;
-import io.grpc.netty.shaded.io.netty.channel.socket.nio.NioServerSocketChannel;
+import io.grpc.netty.NettyServerBuilder;
+import io.netty.channel.nio.NioEventLoopGroup;
+import io.netty.channel.socket.nio.NioServerSocketChannel;
 import nu.marginalia.service.discovery.ServiceRegistryIf;
 import nu.marginalia.service.discovery.property.ServiceKey;
 import nu.marginalia.service.discovery.property.ServicePartition;
--- a/code/execution/api/java/nu/marginalia/executor/client/ExecutorClient.java
+++ b/code/execution/api/java/nu/marginalia/executor/client/ExecutorClient.java
@@ -189,7 +189,7 @@ public class ExecutorClient {
        String uriPath = "/transfer/file/" + fileStorage.id();
        String uriQuery = "path=" + URLEncoder.encode(path, StandardCharsets.UTF_8);

-        var endpoints = registry.getEndpoints(ServiceKey.forRest(ServiceId.Executor, fileStorage.node()));
+        var endpoints = registry.getEndpoints(ServiceKey.forRest(ServiceId.Index, fileStorage.node()));
        if (endpoints.isEmpty()) {
            throw new RuntimeException("No endpoints for node " + fileStorage.node());
        }
--- a/code/execution/build.gradle
+++ b/code/execution/build.gradle
@@ -22,7 +22,6 @@ dependencies {
    implementation project(':code:processes:ping-process')
    implementation project(':code:processes:new-domain-process')
    implementation project(':code:processes:converting-process')
-    implementation project(':code:processes:index-constructor-process')

    implementation project(':code:common:config')
    implementation project(':code:common:model')
@@ -34,7 +33,7 @@ dependencies {
    implementation project(':third-party:commons-codec')

    implementation project(':code:libraries:message-queue')
-    implementation project(':code:libraries:term-frequency-dict')
+    implementation project(':code:libraries:language-processing')

    implementation project(':code:functions:link-graph:api')
    implementation project(':code:functions:live-capture:api')
--- a/code/execution/java/nu/marginalia/process/ProcessSpawnerService.java
+++ b/code/execution/java/nu/marginalia/process/ProcessSpawnerService.java
@@ -5,7 +5,6 @@ import com.google.inject.Singleton;
 import nu.marginalia.WmsaHome;
 import nu.marginalia.converting.ConverterMain;
 import nu.marginalia.crawl.CrawlerMain;
-import nu.marginalia.index.IndexConstructorMain;
 import nu.marginalia.livecrawler.LiveCrawlerMain;
 import nu.marginalia.loading.LoaderMain;
 import nu.marginalia.ndp.NdpMain;
@@ -57,7 +56,7 @@ public class ProcessSpawnerService {
        LIVE_CRAWLER(LiveCrawlerMain.class),
        CONVERTER(ConverterMain.class),
        LOADER(LoaderMain.class),
-        INDEX_CONSTRUCTOR(IndexConstructorMain.class),
+        INDEX_CONSTRUCTOR("nu.marginalia.index.IndexConstructorMain"),
        NDP(NdpMain.class),
        EXPORT_TASKS(ExportTasksMain.class),
        ;
@@ -66,6 +65,9 @@ public class ProcessSpawnerService {
        ProcessId(Class<? extends ProcessMainClass> mainClass) {
            this.mainClass = mainClass.getName();
        }
+        ProcessId(String mainClassFullName) {
+            this.mainClass = mainClassFullName;
+        }

        List<String> envOpts() {
            String variable = switch (this) {
--- a/code/services-core/executor-service/java/nu/marginalia/executor/ExecutorFileTransferService.java
+++ b/code/services-core/executor-service/java/nu/marginalia/executor/ExecutorFileTransferService.java
@@ -1,4 +1,4 @@
-package nu.marginalia.executor;
+package nu.marginalia.svc;

 import com.google.inject.Inject;
 import nu.marginalia.storage.FileStorageService;
--- a/code/execution/readme.md
+++ b/code/execution/readme.md
@@ -1,5 +1,5 @@
 The execution subsystem is responsible for the execution of long running tasks on each
-index node.  It lives in the [executor-service](../services-core/executor-service) module. 
+index node.  It lives in the [index-service](../services-core/index-service) module. 

 It accomplishes this using the [message queue and actor library](../libraries/message-queue/),
 which permits program state to survive crashes and reboots.
--- a/code/services-core/executor-service/test/nu/marginalia/executor/ExecutorFileTransferServiceTest.java
+++ b/code/services-core/executor-service/test/nu/marginalia/executor/ExecutorFileTransferServiceTest.java
@@ -1,4 +1,4 @@
-package nu.marginalia.executor;
+package nu.marginalia.svc;

 import nu.marginalia.storage.FileStorageService;
 import nu.marginalia.storage.model.FileStorage;
--- a/code/functions/live-capture/api/java/nu/marginalia/api/domsample/DomSampleClient.java
+++ b/code/functions/live-capture/api/java/nu/marginalia/api/domsample/DomSampleClient.java
@@ -41,7 +41,22 @@ public class DomSampleClient {
        }
        catch (StatusRuntimeException sre) {
            if (sre.getStatus() != Status.NOT_FOUND) {
-                logger.error("Failed to fetch DOM sample");
+                logger.error("Failed to fetch DOM sample", sre);
+            }
+            return Optional.empty();
+        }
+    }
+
+    public Optional<RpcDomainSampleRequests> getSampleRequests(String domainName) {
+        try {
+            var val = channelPool.call(DomSampleApiGrpc.DomSampleApiBlockingStub::getSampleRequests)
+                    .run(RpcDomainName.newBuilder().setDomainName(domainName).build());
+
+            return Optional.of(val);
+        }
+        catch (StatusRuntimeException sre) {
+            if (sre.getStatus() != Status.NOT_FOUND) {
+                logger.error("Failed to fetch DOM sample", sre);
            }
            return Optional.empty();
        }
--- a/code/functions/live-capture/api/src/main/protobuf/dom-sample.proto
+++ b/code/functions/live-capture/api/src/main/protobuf/dom-sample.proto
@@ -7,6 +7,7 @@ option java_multiple_files=true;

 service DomSampleApi {
  rpc getSample(RpcDomainName) returns (RpcDomainSample) {}
+  rpc getSampleRequests(RpcDomainName) returns (RpcDomainSampleRequests) {}
  rpc hasSample(RpcDomainName) returns (RpcBooleanRsp) {}
  rpc getAllSamples(RpcDomainName) returns (stream RpcDomainSample) {}
 }
@@ -19,10 +20,16 @@ message RpcBooleanRsp {
  bool answer = 1;
 }

+message RpcDomainSampleRequests {
+  string domainName = 1;
+  string url = 2;
+  repeated RpcOutgoingRequest outgoingRequests = 5;
+}
+
 message RpcDomainSample {
  string domainName = 1;
  string url = 2;
-  string htmlSample = 3;
+  bytes htmlSampleZstd = 3;
  bool accepted_popover = 4;
  repeated RpcOutgoingRequest outgoingRequests = 5;
 }
--- a/code/functions/live-capture/build.gradle
+++ b/code/functions/live-capture/build.gradle
@@ -31,6 +31,7 @@ dependencies {
    implementation libs.jsoup
    implementation libs.opencsv
    implementation libs.slop
+    implementation libs.zstd
    implementation libs.sqlite
    implementation libs.bundles.slf4j
    implementation libs.commons.lang3
--- a/code/functions/live-capture/java/nu/marginalia/domsample/DomSampleGrpcService.java
+++ b/code/functions/live-capture/java/nu/marginalia/domsample/DomSampleGrpcService.java
@@ -1,6 +1,8 @@
 package nu.marginalia.domsample;

+import com.github.luben.zstd.Zstd;
 import com.google.inject.Inject;
+import com.google.protobuf.ByteString;
 import io.grpc.Status;
 import io.grpc.stub.StreamObserver;
 import nu.marginalia.api.domsample.*;
@@ -9,6 +11,7 @@ import nu.marginalia.service.server.DiscoverableService;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

+import java.nio.charset.StandardCharsets;
 import java.util.List;

 public class DomSampleGrpcService
@@ -42,7 +45,36 @@ public class DomSampleGrpcService
            }

            // Grab the first sample
-            RpcDomainSample.Builder response = convert(dbRecords.getFirst());
+            RpcDomainSample.Builder response = convertFullSample(dbRecords.getFirst());
+
+            responseObserver.onNext(response.build());
+            responseObserver.onCompleted();
+        }
+        catch (Exception e) {
+            logger.error("Error in getSample()", e);
+            responseObserver.onError(Status.INTERNAL.withCause(e).asRuntimeException());
+        }
+    }
+
+    @Override
+    public void getSampleRequests(RpcDomainName request, StreamObserver<RpcDomainSampleRequests> responseObserver) {
+        String domainName = request.getDomainName();
+        if (domainName.isBlank()) {
+            responseObserver.onError(Status.INVALID_ARGUMENT
+                    .withDescription("Invalid domain name")
+                    .asRuntimeException());
+            return;
+        }
+
+        try {
+            List<DomSampleDb.Sample> dbRecords = domSampleDb.getSamples(domainName);
+            if (dbRecords.isEmpty()) {
+                responseObserver.onError(Status.NOT_FOUND.withDescription("No sample found").asRuntimeException());
+                return;
+            }
+
+            // Grab the first sample
+            RpcDomainSampleRequests.Builder response = convertRequestData(dbRecords.getFirst());

            responseObserver.onNext(response.build());
            responseObserver.onCompleted();
@@ -87,7 +119,7 @@ public class DomSampleGrpcService
            List<DomSampleDb.Sample> dbRecords = domSampleDb.getSamples(domainName);

            for (var record : dbRecords) {
-                responseObserver.onNext(convert(record).build());
+                responseObserver.onNext(convertFullSample(record).build());
            }

            responseObserver.onCompleted();
@@ -98,12 +130,14 @@ public class DomSampleGrpcService
        }
    }

-    private RpcDomainSample.Builder convert(DomSampleDb.Sample dbSample) {
+    private RpcDomainSample.Builder convertFullSample(DomSampleDb.Sample dbSample) {
+
+        ByteString htmlZstd = ByteString.copyFrom(Zstd.compress(dbSample.sample().getBytes(StandardCharsets.UTF_8)));

        var sampleBuilder = RpcDomainSample.newBuilder()
                .setDomainName(dbSample.domain())
                .setAcceptedPopover(dbSample.acceptedPopover())
-                .setHtmlSample(dbSample.sample());
+                .setHtmlSampleZstd(htmlZstd);

        for (var req : dbSample.parseRequests()) {
            sampleBuilder.addOutgoingRequestsBuilder()
@@ -120,4 +154,23 @@ public class DomSampleGrpcService
        return sampleBuilder;
    }

+    private RpcDomainSampleRequests.Builder convertRequestData(DomSampleDb.Sample dbSample) {
+
+        var sampleBuilder = RpcDomainSampleRequests.newBuilder()
+                .setDomainName(dbSample.domain());
+
+        for (var req : dbSample.parseRequests()) {
+            sampleBuilder.addOutgoingRequestsBuilder()
+                    .setUrl(req.uri().toString())
+                    .setMethod(switch (req.method().toUpperCase())
+                    {
+                        case "GET" -> RpcOutgoingRequest.RequestMethod.GET;
+                        case "POST" -> RpcOutgoingRequest.RequestMethod.POST;
+                        default -> RpcOutgoingRequest.RequestMethod.OTHER;
+                    })
+                    .setTimestamp(req.timestamp());
+        }
+
+        return sampleBuilder;
+    }
 }
--- a/code/functions/live-capture/test/nu/marginalia/rss/svc/FeedFetcherServiceTest.java
+++ b/code/functions/live-capture/test/nu/marginalia/rss/svc/FeedFetcherServiceTest.java
@@ -87,7 +87,7 @@ class FeedFetcherServiceTest extends AbstractModule {
        bind(DomainCoordinator.class).to(LocalDomainCoordinator.class);
        bind(HikariDataSource.class).toInstance(dataSource);
        bind(ServiceRegistryIf.class).toInstance(Mockito.mock(ServiceRegistryIf.class));
-        bind(ServiceConfiguration.class).toInstance(new ServiceConfiguration(ServiceId.Executor, 1, "", "", 0, UUID.randomUUID()));
+        bind(ServiceConfiguration.class).toInstance(new ServiceConfiguration(ServiceId.Index, 1, "", "", 0, UUID.randomUUID()));
        bind(Integer.class).annotatedWith(Names.named("wmsa-system-node")).toInstance(1);
    }

--- a/code/functions/search-query/api/build.gradle
+++ b/code/functions/search-query/api/build.gradle
@@ -22,7 +22,6 @@ dependencies {
    implementation project(':code:common:model')
    implementation project(':code:common:config')
    implementation project(':code:common:service')
-    implementation project(':code:index:query')
    implementation project(':code:libraries:language-processing')

    implementation libs.bundles.slf4j
--- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/IndexProtobufCodec.java
+++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/IndexProtobufCodec.java
@@ -2,8 +2,8 @@ package nu.marginalia.api.searchquery;

 import nu.marginalia.api.searchquery.model.query.SearchPhraseConstraint;
 import nu.marginalia.api.searchquery.model.query.SearchQuery;
-import nu.marginalia.index.query.limit.SpecificationLimit;
-import nu.marginalia.index.query.limit.SpecificationLimitType;
+import nu.marginalia.api.searchquery.model.query.SpecificationLimit;
+import nu.marginalia.api.searchquery.model.query.SpecificationLimitType;

 import java.util.ArrayList;
 import java.util.List;
--- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/QueryProtobufCodec.java
+++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/QueryProtobufCodec.java
@@ -9,7 +9,7 @@ import nu.marginalia.api.searchquery.model.results.debug.DebugFactor;
 import nu.marginalia.api.searchquery.model.results.debug.DebugFactorGroup;
 import nu.marginalia.api.searchquery.model.results.debug.DebugTermFactorGroup;
 import nu.marginalia.api.searchquery.model.results.debug.ResultRankingDetails;
-import nu.marginalia.index.query.limit.QueryStrategy;
+import nu.marginalia.api.searchquery.model.query.QueryStrategy;
 import nu.marginalia.model.EdgeUrl;

 import java.util.ArrayList;
@@ -28,6 +28,7 @@ public class QueryProtobufCodec {

        builder.setSearchSetIdentifier(query.specs.searchSetIdentifier);
        builder.setHumanQuery(request.getHumanQuery());
+        builder.setLangIsoCode(query.langIsoCode);

        builder.setNsfwFilterTierValue(request.getNsfwFilterTierValue());

@@ -76,6 +77,7 @@ public class QueryProtobufCodec {

        builder.setSearchSetIdentifier(query.specs.searchSetIdentifier);
        builder.setHumanQuery(humanQuery);
+        builder.setLangIsoCode(query.langIsoCode);

        builder.setNsfwFilterTier(RpcIndexQuery.NSFW_FILTER_TIER.DANGER);

@@ -114,6 +116,7 @@ public class QueryProtobufCodec {
                QueryStrategy.valueOf(request.getQueryStrategy()),
                RpcTemporalBias.Bias.valueOf(request.getTemporalBias().getBias().name()),
                NsfwFilterTier.fromCodedValue(request.getNsfwFilterTierValue()),
+                request.getLangIsoCode(),
                request.getPagination().getPage()
        );
    }
@@ -304,7 +307,6 @@ public class QueryProtobufCodec {
                IndexProtobufCodec.convertRpcQuery(specs.getQuery()),
                specs.getDomainsList(),
                specs.getSearchSetIdentifier(),
-                specs.getHumanQuery(),
                IndexProtobufCodec.convertSpecLimit(specs.getQuality()),
                IndexProtobufCodec.convertSpecLimit(specs.getYear()),
                IndexProtobufCodec.convertSpecLimit(specs.getSize()),
@@ -336,7 +338,8 @@ public class QueryProtobufCodec {
                .setPagination(RpcQsQueryPagination.newBuilder()
                        .setPage(params.page())
                        .setPageSize(Math.min(100, params.limits().getResultsTotal()))
-                        .build());
+                        .build())
+                .setLangIsoCode(params.langIsoCode());

        if (params.nearDomain() != null)
            builder.setNearDomain(params.nearDomain());
--- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/ProcessedQuery.java
+++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/ProcessedQuery.java
@@ -1,19 +1,24 @@
 package nu.marginalia.api.searchquery.model.query;

-import java.util.*;
+import java.util.List;

 public class ProcessedQuery {
    public final SearchSpecification specs;
    public final List<String> searchTermsHuman;
    public final String domain;
+    public final String langIsoCode;

-    public ProcessedQuery(SearchSpecification specs, List<String> searchTermsHuman, String domain) {
+    public ProcessedQuery(SearchSpecification specs,
+                          List<String> searchTermsHuman,
+                          String domain,
+                          String langIsoCode) {
        this.specs = specs;
        this.searchTermsHuman = searchTermsHuman;
        this.domain = domain;
+        this.langIsoCode = langIsoCode;
    }

    public ProcessedQuery(SearchSpecification justSpecs) {
-        this(justSpecs, List.of(), null);
+        this(justSpecs, List.of(), null, "en");
    }
 }
--- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/QueryParams.java
+++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/QueryParams.java
@@ -2,8 +2,6 @@ package nu.marginalia.api.searchquery.model.query;

 import nu.marginalia.api.searchquery.RpcQueryLimits;
 import nu.marginalia.api.searchquery.RpcTemporalBias;
-import nu.marginalia.index.query.limit.QueryStrategy;
-import nu.marginalia.index.query.limit.SpecificationLimit;

 import javax.annotation.Nullable;
 import java.util.List;
@@ -26,10 +24,11 @@ public record QueryParams(
        QueryStrategy queryStrategy,
        RpcTemporalBias.Bias temporalBias,
        NsfwFilterTier filterTier,
+        String langIsoCode,
        int page
 )
 {
-    public QueryParams(String query, RpcQueryLimits limits, String identifier, NsfwFilterTier filterTier) {
+    public QueryParams(String query, RpcQueryLimits limits, String identifier, NsfwFilterTier filterTier, String langIsoCode) {
        this(query, null,
                List.of(),
                List.of(),
@@ -45,6 +44,7 @@ public record QueryParams(
                QueryStrategy.AUTO,
                RpcTemporalBias.Bias.NONE,
                filterTier,
+                langIsoCode,
                1 // page
                );
    }
--- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/QueryStrategy.java
+++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/QueryStrategy.java
@@ -1,4 +1,4 @@
-package nu.marginalia.index.query.limit;
+package nu.marginalia.api.searchquery.model.query;

 public enum QueryStrategy {
    SENTENCE,
--- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/SearchSpecification.java
+++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/SearchSpecification.java
@@ -2,8 +2,6 @@ package nu.marginalia.api.searchquery.model.query;

 import nu.marginalia.api.searchquery.RpcQueryLimits;
 import nu.marginalia.api.searchquery.RpcResultRankingParameters;
-import nu.marginalia.index.query.limit.QueryStrategy;
-import nu.marginalia.index.query.limit.SpecificationLimit;

 import javax.annotation.Nullable;
 import java.util.List;
@@ -18,8 +16,6 @@ public class SearchSpecification {

    public String searchSetIdentifier;

-    public final String humanQuery;
-
    public SpecificationLimit quality;
    public SpecificationLimit year;
    public SpecificationLimit size;
@@ -35,7 +31,6 @@ public class SearchSpecification {
    public SearchSpecification(SearchQuery query,
                               List<Integer> domains,
                               String searchSetIdentifier,
-                               String humanQuery,
                               SpecificationLimit quality,
                               SpecificationLimit year,
                               SpecificationLimit size,
@@ -47,7 +42,6 @@ public class SearchSpecification {
        this.query = query;
        this.domains = domains;
        this.searchSetIdentifier = searchSetIdentifier;
-        this.humanQuery = humanQuery;
        this.quality = quality;
        this.year = year;
        this.size = size;
@@ -73,10 +67,6 @@ public class SearchSpecification {
        return this.searchSetIdentifier;
    }

-    public String getHumanQuery() {
-        return this.humanQuery;
-    }
-
    public SpecificationLimit getQuality() {
        return this.quality;
    }
@@ -106,14 +96,13 @@ public class SearchSpecification {
    }

    public String toString() {
-        return "SearchSpecification(query=" + this.getQuery() + ", domains=" + this.getDomains() + ", searchSetIdentifier=" + this.getSearchSetIdentifier() + ", humanQuery=" + this.getHumanQuery() + ", quality=" + this.getQuality() + ", year=" + this.getYear() + ", size=" + this.getSize() + ", rank=" + this.getRank() + ", queryLimits=" + this.getQueryLimits() + ", queryStrategy=" + this.getQueryStrategy() + ", rankingParams=" + this.getRankingParams() + ")";
+        return "SearchSpecification(query=" + this.getQuery() + ", domains=" + this.getDomains() + ", searchSetIdentifier=" + this.getSearchSetIdentifier() + ", quality=" + this.getQuality() + ", year=" + this.getYear() + ", size=" + this.getSize() + ", rank=" + this.getRank() + ", queryLimits=" + this.getQueryLimits() + ", queryStrategy=" + this.getQueryStrategy() + ", rankingParams=" + this.getRankingParams() + ")";
    }

    public static class SearchSpecificationBuilder {
        private SearchQuery query;
        private List<Integer> domains;
        private String searchSetIdentifier;
-        private String humanQuery;
        private SpecificationLimit quality$value;
        private boolean quality$set;
        private SpecificationLimit year$value;
@@ -144,11 +133,6 @@ public class SearchSpecification {
            return this;
        }

-        public SearchSpecificationBuilder humanQuery(String humanQuery) {
-            this.humanQuery = humanQuery;
-            return this;
-        }
-
        public SearchSpecificationBuilder quality(SpecificationLimit quality) {
            this.quality$value = quality;
            this.quality$set = true;
@@ -205,11 +189,7 @@ public class SearchSpecification {
            if (!this.rank$set) {
                rank$value = SpecificationLimit.none();
            }
-            return new SearchSpecification(this.query, this.domains, this.searchSetIdentifier, this.humanQuery, quality$value, year$value, size$value, rank$value, this.queryLimits, this.queryStrategy, this.rankingParams);
-        }
-
-        public String toString() {
-            return "SearchSpecification.SearchSpecificationBuilder(query=" + this.query + ", domains=" + this.domains + ", searchSetIdentifier=" + this.searchSetIdentifier + ", humanQuery=" + this.humanQuery + ", quality$value=" + this.quality$value + ", year$value=" + this.year$value + ", size$value=" + this.size$value + ", rank$value=" + this.rank$value + ", queryLimits=" + this.queryLimits + ", queryStrategy=" + this.queryStrategy + ", rankingParams=" + this.rankingParams + ")";
+            return new SearchSpecification(this.query, this.domains, this.searchSetIdentifier, quality$value, year$value, size$value, rank$value, this.queryLimits, this.queryStrategy, this.rankingParams);
        }
    }
 }
--- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/SpecificationLimit.java
+++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/SpecificationLimit.java
@@ -1,4 +1,4 @@
-package nu.marginalia.index.query.limit;
+package nu.marginalia.api.searchquery.model.query;

 public record SpecificationLimit(SpecificationLimitType type, int value) {
    public boolean isNone() {
--- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/SpecificationLimitType.java
+++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/SpecificationLimitType.java
@@ -1,4 +1,4 @@
-package nu.marginalia.index.query.limit;
+package nu.marginalia.api.searchquery.model.query;

 public enum SpecificationLimitType {
    NONE,
--- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/ResultRankingContext.java
+++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/ResultRankingContext.java
@@ -1,56 +0,0 @@
-package nu.marginalia.api.searchquery.model.results;
-
-import nu.marginalia.api.searchquery.RpcResultRankingParameters;
-import nu.marginalia.api.searchquery.model.compiled.CqDataInt;
-
-import java.util.BitSet;
-
-public class ResultRankingContext {
-    private final int docCount;
-    public final RpcResultRankingParameters params;
-
-
-    public final BitSet regularMask;
-    public final BitSet ngramsMask;
-
-    /** CqDataInt associated with frequency information of the terms in the query
-     * in the full index.  The dataset is indexed by the compiled query. */
-    public final CqDataInt fullCounts;
-
-    /** CqDataInt associated with frequency information of the terms in the query
-     * in the full index.  The dataset is indexed by the compiled query. */
-    public final CqDataInt priorityCounts;
-
-    public ResultRankingContext(int docCount,
-                                RpcResultRankingParameters params,
-                                BitSet ngramsMask,
-                                BitSet regularMask,
-                                CqDataInt fullCounts,
-                                CqDataInt prioCounts)
-    {
-        this.docCount = docCount;
-        this.params = params;
-
-        this.ngramsMask = ngramsMask;
-        this.regularMask = regularMask;
-
-        this.fullCounts = fullCounts;
-        this.priorityCounts = prioCounts;
-    }
-
-    public int termFreqDocCount() {
-        return docCount;
-    }
-
-    @Override
-    public String toString() {
-        return "ResultRankingContext{" +
-                "docCount=" + docCount +
-                ", params=" + params +
-                ", regularMask=" + regularMask +
-                ", ngramsMask=" + ngramsMask +
-                ", fullCounts=" + fullCounts +
-                ", priorityCounts=" + priorityCounts +
-                '}';
-    }
-}
--- a/code/functions/search-query/api/src/main/protobuf/query-api.proto
+++ b/code/functions/search-query/api/src/main/protobuf/query-api.proto
@@ -34,6 +34,7 @@ message RpcQsQuery {
  RpcQsQueryPagination pagination = 17;

  NSFW_FILTER_TIER nsfwFilterTier = 18;
+  string langIsoCode = 19;

  enum NSFW_FILTER_TIER {
    NONE = 0;
@@ -88,6 +89,7 @@ message RpcIndexQuery {
  RpcResultRankingParameters parameters = 12;

  NSFW_FILTER_TIER nsfwFilterTier = 13;
+  string langIsoCode = 14;

  enum NSFW_FILTER_TIER {
    NONE = 0;
--- a/code/functions/search-query/api/test/nu/marginalia/index/client/IndexProtobufCodecTest.java
+++ b/code/functions/search-query/api/test/nu/marginalia/index/client/IndexProtobufCodecTest.java
@@ -3,7 +3,7 @@ package nu.marginalia.index.client;
 import nu.marginalia.api.searchquery.IndexProtobufCodec;
 import nu.marginalia.api.searchquery.model.query.SearchPhraseConstraint;
 import nu.marginalia.api.searchquery.model.query.SearchQuery;
-import nu.marginalia.index.query.limit.SpecificationLimit;
+import nu.marginalia.api.searchquery.model.query.SpecificationLimit;
 import org.junit.jupiter.api.Test;

 import java.util.List;
--- a/code/functions/search-query/build.gradle
+++ b/code/functions/search-query/build.gradle
@@ -22,18 +22,13 @@ dependencies {
    implementation project(':code:functions:nsfw-domain-filter')
    implementation project(':code:functions:search-query:api')

-    implementation project(':code:index:query')
-
    implementation project(':code:libraries:language-processing')
-    implementation project(':code:libraries:term-frequency-dict')

    implementation project(':third-party:porterstemmer')
    implementation project(':third-party:openzim')
    implementation project(':third-party:commons-codec')

    implementation project(':code:libraries:language-processing')
-    implementation project(':code:libraries:term-frequency-dict')
-    implementation project(':code:processes:converting-process:ft-keyword-extraction')

    implementation libs.bundles.slf4j

--- a/code/functions/search-query/java/nu/marginalia/functions/searchquery/QueryFactory.java
+++ b/code/functions/search-query/java/nu/marginalia/functions/searchquery/QueryFactory.java
@@ -8,8 +8,8 @@ import nu.marginalia.api.searchquery.model.query.*;
 import nu.marginalia.functions.searchquery.query_parser.QueryExpansion;
 import nu.marginalia.functions.searchquery.query_parser.QueryParser;
 import nu.marginalia.functions.searchquery.query_parser.token.QueryToken;
-import nu.marginalia.index.query.limit.QueryStrategy;
-import nu.marginalia.index.query.limit.SpecificationLimit;
+import nu.marginalia.api.searchquery.model.query.QueryStrategy;
+import nu.marginalia.api.searchquery.model.query.SpecificationLimit;
 import nu.marginalia.language.WordPatterns;
 import org.apache.commons.lang3.StringUtils;
 import org.slf4j.Logger;
@@ -34,8 +34,6 @@ public class QueryFactory {
        this.queryExpansion = queryExpansion;
    }

-
-
    public ProcessedQuery createQuery(QueryParams params,
                                      @Nullable RpcResultRankingParameters rankingParams) {
        final var query = params.humanQuery();
@@ -153,7 +151,6 @@ public class QueryFactory {

        var specsBuilder = SearchSpecification.builder()
                .query(queryBuilder.build())
-                .humanQuery(query)
                .quality(qualityLimit)
                .year(year)
                .size(size)
@@ -170,7 +167,7 @@ public class QueryFactory {
        specs.query.searchTermsPriority.addAll(params.tacitPriority());
        specs.query.searchTermsExclude.addAll(params.tacitExcludes());

-        return new ProcessedQuery(specs, searchTermsHuman, domain);
+        return new ProcessedQuery(specs, searchTermsHuman, domain, params.langIsoCode());
    }

    private void analyzeSearchTerm(List<String> problems, String str, String displayStr) {
--- a/code/functions/search-query/java/nu/marginalia/functions/searchquery/query_parser/QueryParser.java
+++ b/code/functions/search-query/java/nu/marginalia/functions/searchquery/query_parser/QueryParser.java
@@ -1,7 +1,7 @@
 package nu.marginalia.functions.searchquery.query_parser;

 import nu.marginalia.functions.searchquery.query_parser.token.QueryToken;
-import nu.marginalia.index.query.limit.SpecificationLimit;
+import nu.marginalia.api.searchquery.model.query.SpecificationLimit;
 import nu.marginalia.language.WordPatterns;
 import nu.marginalia.language.encoding.AsciiFlattener;
 import nu.marginalia.util.transform_list.TransformList;
--- a/code/functions/search-query/java/nu/marginalia/functions/searchquery/query_parser/token/QueryToken.java
+++ b/code/functions/search-query/java/nu/marginalia/functions/searchquery/query_parser/token/QueryToken.java
@@ -1,7 +1,7 @@
 package nu.marginalia.functions.searchquery.query_parser.token;


-import nu.marginalia.index.query.limit.SpecificationLimit;
+import nu.marginalia.api.searchquery.model.query.SpecificationLimit;

 public sealed interface QueryToken {
    String str();
--- a/code/functions/search-query/test/nu/marginalia/query/svc/QueryFactoryTest.java
+++ b/code/functions/search-query/test/nu/marginalia/query/svc/QueryFactoryTest.java
@@ -3,14 +3,9 @@ package nu.marginalia.query.svc;
 import nu.marginalia.WmsaHome;
 import nu.marginalia.api.searchquery.RpcQueryLimits;
 import nu.marginalia.api.searchquery.RpcTemporalBias;
-import nu.marginalia.api.searchquery.model.query.NsfwFilterTier;
-import nu.marginalia.api.searchquery.model.query.QueryParams;
-import nu.marginalia.api.searchquery.model.query.SearchSpecification;
+import nu.marginalia.api.searchquery.model.query.*;
 import nu.marginalia.functions.searchquery.QueryFactory;
 import nu.marginalia.functions.searchquery.query_parser.QueryExpansion;
-import nu.marginalia.index.query.limit.QueryStrategy;
-import nu.marginalia.index.query.limit.SpecificationLimit;
-import nu.marginalia.index.query.limit.SpecificationLimitType;
 import nu.marginalia.segmentation.NgramLexicon;
 import nu.marginalia.term_frequency_dict.TermFrequencyDict;
 import org.junit.jupiter.api.Assertions;
@@ -60,6 +55,7 @@ public class QueryFactoryTest {
                        QueryStrategy.AUTO,
                        RpcTemporalBias.Bias.NONE,
                        NsfwFilterTier.OFF,
+                        "en",
                        0), null).specs;
    }

@@ -216,6 +212,12 @@ public class QueryFactoryTest {
    }


+    @Test
+    public void testExpansion10() {
+        var subquery = parseAndGetSpecs("when was captain james cook born");
+        System.out.println(subquery);
+    }
+
    @Test
    public void testContractionWordNum() {
        var subquery = parseAndGetSpecs("glove 80");
@@ -241,7 +243,6 @@ public class QueryFactoryTest {

        Assertions.assertTrue(subquery.query.compiledQuery.contains(" bob "));
        Assertions.assertFalse(subquery.query.compiledQuery.contains(" bob's "));
-        Assertions.assertEquals("\"bob's cars\"", subquery.humanQuery);
    }

    @Test
--- a/code/index/build.gradle
+++ b/code/index/build.gradle
@@ -22,8 +22,13 @@ dependencies {

    implementation project(':code:libraries:array')
    implementation project(':code:libraries:btree')
+    implementation project(':code:libraries:skiplist')
+    implementation project(':code:libraries:native')
+    implementation project(':code:libraries:random-write-funnel')
    implementation project(':code:libraries:coded-sequence')
    implementation project(':code:libraries:language-processing')
+    implementation project(':code:libraries:message-queue')
+

    implementation project(':code:common:db')
    implementation project(':code:common:config')
@@ -32,11 +37,9 @@ dependencies {
    implementation project(':code:common:service')

    implementation project(':code:processes:converting-process:model')
+    implementation project(':code:processes:process-mq-api')

    implementation project(':code:functions:search-query:api')
-    implementation project(':code:index:index-forward')
-    implementation project(':code:index:index-reverse')
-    implementation project(':code:index:query')
    implementation project(':code:index:index-journal')


@@ -74,7 +77,7 @@ dependencies {
    testImplementation 'org.testcontainers:mariadb:1.17.4'
    testImplementation 'org.testcontainers:junit-jupiter:1.17.4'
    testImplementation project(':code:libraries:test-helpers')
-    testImplementation project(':code:libraries:term-frequency-dict')
+    testImplementation project(':code:libraries:language-processing')
    testImplementation project(':code:libraries:braille-block-punch-cards')
    testImplementation project(':code:libraries:test-helpers')
 }
--- a/code/index/index-forward/build.gradle
+++ b/code/index/index-forward/build.gradle
@@ -1,38 +0,0 @@
-plugins {
-    id 'java'
-
-    id 'jvm-test-suite'
-}
-
-java {
-    toolchain {
-        languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion))
-    }
-}
-
-apply from: "$rootProject.projectDir/srcsets.gradle"
-
-dependencies {
-    implementation project(':code:libraries:array')
-    implementation project(':code:libraries:btree')
-    implementation project(':code:libraries:coded-sequence')
-    implementation project(':code:libraries:language-processing')
-    implementation project(':code:index:query')
-    implementation project(':code:index:index-journal')
-    implementation project(':code:common:model')
-    implementation project(':code:common:service')
-    implementation project(':code:processes:converting-process:model')
-
-    implementation libs.bundles.slf4j
-
-    implementation libs.prometheus
-    implementation libs.roaringbitmap
-    implementation libs.fastutil
-    implementation libs.trove
-    implementation libs.slop
-
-    testImplementation project(':code:libraries:test-helpers')
-    testImplementation libs.bundles.slf4j.test
-    testImplementation libs.bundles.junit
-    testImplementation libs.mockito
-}
--- a/code/index/index-forward/java/nu/marginalia/index/forward/ForwardIndexFileNames.java
+++ b/code/index/index-forward/java/nu/marginalia/index/forward/ForwardIndexFileNames.java
@@ -1,33 +0,0 @@
-package nu.marginalia.index.forward;
-
-import java.nio.file.Path;
-
-public class ForwardIndexFileNames {
-    public static Path resolve(Path basePath, FileIdentifier identifier, FileVersion version) {
-        return switch (identifier) {
-            case DOC_ID -> switch (version) {
-                case NEXT -> basePath.resolve("fwd-doc-id.dat.next");
-                case CURRENT -> basePath.resolve("fwd-doc-id.dat");
-            };
-            case DOC_DATA -> switch (version) {
-                case NEXT -> basePath.resolve("fwd-doc-data.dat.next");
-                case CURRENT -> basePath.resolve("fwd-doc-data.dat");
-            };
-            case SPANS_DATA -> switch (version) {
-                case NEXT -> basePath.resolve("fwd-spans.dat.next");
-                case CURRENT -> basePath.resolve("fwd-spans.dat");
-            };
-        };
-    }
-
-    public enum FileVersion {
-        CURRENT,
-        NEXT
-    }
-
-    public enum FileIdentifier {
-        DOC_DATA,
-        SPANS_DATA,
-        DOC_ID
-    }
-}
--- a/code/index/index-forward/java/nu/marginalia/index/forward/spans/ForwardIndexSpansReader.java
+++ b/code/index/index-forward/java/nu/marginalia/index/forward/spans/ForwardIndexSpansReader.java
@@ -1,59 +0,0 @@
-package nu.marginalia.index.forward.spans;
-
-import nu.marginalia.sequence.VarintCodedSequence;
-
-import java.io.IOException;
-import java.lang.foreign.Arena;
-import java.nio.ByteBuffer;
-import java.nio.channels.FileChannel;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.StandardOpenOption;
-
-@SuppressWarnings("preview")
-public class ForwardIndexSpansReader implements AutoCloseable {
-    private final FileChannel spansFileChannel;
-
-    public ForwardIndexSpansReader(Path spansFile) throws IOException {
-        this.spansFileChannel = (FileChannel) Files.newByteChannel(spansFile, StandardOpenOption.READ);
-    }
-
-    public DocumentSpans readSpans(Arena arena, long encodedOffset) throws IOException {
-        // Decode the size and offset from the encoded offset
-        long size = SpansCodec.decodeSize(encodedOffset);
-        long offset = SpansCodec.decodeStartOffset(encodedOffset);
-
-        // Allocate a buffer from the arena
-        var buffer = arena.allocate(size).asByteBuffer();
-        buffer.clear();
-        while (buffer.hasRemaining()) {
-            spansFileChannel.read(buffer, offset + buffer.position());
-        }
-        buffer.flip();
-
-        // Read the number of spans in the document
-        int count = buffer.get();
-
-        DocumentSpans ret = new DocumentSpans();
-
-        // Decode each span
-        while (count-- > 0) {
-            byte code = buffer.get();
-            short len = buffer.getShort();
-
-            ByteBuffer data = buffer.slice(buffer.position(), len);
-            ret.accept(code, new VarintCodedSequence(data));
-
-            // Reset the buffer position to the end of the span
-            buffer.position(buffer.position() + len);
-        }
-
-        return ret;
-    }
-
-    @Override
-    public void close() throws IOException {
-        spansFileChannel.close();
-    }
-
-}
--- a/code/index/index-forward/java/nu/marginalia/index/forward/spans/ForwardIndexSpansWriter.java
+++ b/code/index/index-forward/java/nu/marginalia/index/forward/spans/ForwardIndexSpansWriter.java
@@ -1,52 +0,0 @@
-package nu.marginalia.index.forward.spans;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.channels.FileChannel;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.StandardOpenOption;
-
-public class ForwardIndexSpansWriter implements AutoCloseable {
-    private final FileChannel outputChannel;
-    private final ByteBuffer work = ByteBuffer.allocate(32);
-
-    private long stateStartOffset = -1;
-    private int stateLength = -1;
-
-    public ForwardIndexSpansWriter(Path outputFileSpansData) throws IOException {
-        this.outputChannel = (FileChannel) Files.newByteChannel(outputFileSpansData, StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE);
-    }
-
-    public void beginRecord(int count) throws IOException {
-        stateStartOffset = outputChannel.position();
-        stateLength = 0;
-
-        work.clear();
-        work.put((byte) count);
-        work.flip();
-
-        while (work.hasRemaining())
-            stateLength += outputChannel.write(work);
-    }
-
-    public void writeSpan(byte spanCode, ByteBuffer sequenceData) throws IOException {
-        work.clear();
-        work.put(spanCode);
-        work.putShort((short) sequenceData.remaining());
-        work.flip();
-
-        while (work.hasRemaining() || sequenceData.hasRemaining()) {
-            stateLength += (int) outputChannel.write(new ByteBuffer[]{work, sequenceData});
-        }
-    }
-
-    public long endRecord() {
-        return SpansCodec.encode(stateStartOffset, stateLength);
-    }
-
-    @Override
-    public void close() throws IOException {
-        outputChannel.close();
-    }
-}
--- a/code/index/index-forward/java/nu/marginalia/index/forward/spans/SpansCodec.java
+++ b/code/index/index-forward/java/nu/marginalia/index/forward/spans/SpansCodec.java
@@ -1,17 +0,0 @@
-package nu.marginalia.index.forward.spans;
-
-public class SpansCodec {
-    public static long encode(long startOffset, long size) {
-        assert size < 0x1000_0000L : "Size must be less than 2^28";
-
-        return startOffset << 28 | (size & 0xFFF_FFFFL);
-    }
-
-    public static long decodeStartOffset(long encoded) {
-        return encoded >>> 28;
-    }
-
-    public static long decodeSize(long encoded) {
-        return encoded & 0x0FFF_FFFFL;
-    }
-}
--- a/code/index/index-forward/readme.md
+++ b/code/index/index-forward/readme.md
@@ -1,21 +0,0 @@
-# Forward Index
-
-The forward index contains a mapping from document id to various forms of document metadata.  
-
-In practice, the forward index consists of two files, an `id` file and a `data` file.
-
-The `id` file contains a list of sorted document ids, and the `data` file contains 
-metadata for each document id, in the same order as the `id` file, with a fixed
-size record containing data associated with each document id.
-
-Each record contains a binary encoded [DocumentMetadata](../../common/model/java/nu/marginalia/model/idx/DocumentMetadata.java) object,
-as well as a [HtmlFeatures](../../common/model/java/nu/marginalia/model/crawl/HtmlFeature.java) bitmask.
-
-Unlike the reverse index, the forward index is not split into two tiers, and the data is in the same
-order as it is in the source data, and the cardinality of the document IDs is assumed to fit in memory,
-so it's relatively easy to construct.
-
-## Central Classes
-
-* [ForwardIndexConverter](java/nu/marginalia/index/forward/construction/ForwardIndexConverter.java) constructs the index.
-* [ForwardIndexReader](java/nu/marginalia/index/forward/ForwardIndexReader.java) interrogates the index.
--- a/code/index/index-journal/build.gradle
+++ b/code/index/index-journal/build.gradle
@@ -14,6 +14,7 @@ apply from: "$rootProject.projectDir/srcsets.gradle"

 dependencies {
    implementation project(':code:libraries:coded-sequence')
+    implementation project(':code:libraries:language-processing')
    implementation project(':code:libraries:array')
    implementation project(':code:common:model')
    implementation project(':code:processes:converting-process:model')
--- a/code/index/index-journal/java/nu/marginalia/index/journal/IndexJournal.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/IndexJournal.java
@@ -2,11 +2,10 @@ package nu.marginalia.index.journal;

 import nu.marginalia.slop.SlopTable;

+import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Optional;
+import java.util.*;

 public record IndexJournal(Path journalDir) {

@@ -47,4 +46,21 @@ public record IndexJournal(Path journalDir) {

        return instances;
    }
+
+    public Set<String> languages() {
+        try {
+            Set<String> languages = new HashSet<>(languages());
+
+            for (var instance : pages()) {
+                try (var slopTable = new SlopTable(instance.baseDir(), instance.page())) {
+                    languages.addAll(instance.openLanguageIsoCode(slopTable).getDictionary());
+                }
+            }
+
+            return languages;
+        }
+        catch (IOException ex) {
+            throw new RuntimeException("Failed to read langauges from index journal");
+        }
+    }
 }
--- a/code/index/index-journal/java/nu/marginalia/index/journal/IndexJournalPage.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/IndexJournalPage.java
@@ -6,17 +6,22 @@ import nu.marginalia.slop.column.array.ByteArrayColumn;
 import nu.marginalia.slop.column.array.LongArrayColumn;
 import nu.marginalia.slop.column.primitive.IntColumn;
 import nu.marginalia.slop.column.primitive.LongColumn;
+import nu.marginalia.slop.column.string.EnumColumn;
 import nu.marginalia.slop.desc.StorageType;

 import java.io.IOException;
+import java.nio.charset.StandardCharsets;
 import java.nio.file.Path;

 public record IndexJournalPage(Path baseDir, int page) {
    public static IntColumn features = new IntColumn("features", StorageType.PLAIN);
    public static IntColumn size = new IntColumn("size", StorageType.PLAIN);
+
    public static LongColumn combinedId = new LongColumn("combinedId", StorageType.PLAIN);
    public static LongColumn documentMeta = new LongColumn("documentMeta", StorageType.PLAIN);

+    public static EnumColumn languageIsoCode = new EnumColumn("languageIsoCode", StandardCharsets.US_ASCII, StorageType.PLAIN);
+
    public static LongArrayColumn termIds = new LongArrayColumn("termIds", StorageType.ZSTD);
    public static ByteArrayColumn termMeta = new ByteArrayColumn("termMetadata", StorageType.ZSTD);
    public static VarintCodedSequenceArrayColumn positions = new VarintCodedSequenceArrayColumn("termPositions", StorageType.ZSTD);
@@ -24,6 +29,7 @@ public record IndexJournalPage(Path baseDir, int page) {
    public static ByteArrayColumn spanCodes = new ByteArrayColumn("spanCodes", StorageType.ZSTD);
    public static VarintCodedSequenceArrayColumn spans = new VarintCodedSequenceArrayColumn("spans", StorageType.ZSTD);

+
    public IndexJournalPage {
        if (!baseDir.toFile().isDirectory()) {
            throw new IllegalArgumentException("Invalid base directory: " + baseDir);
@@ -46,6 +52,9 @@ public record IndexJournalPage(Path baseDir, int page) {
        return size.open(table);
    }

+    public EnumColumn.Reader openLanguageIsoCode(SlopTable table) throws IOException {
+        return languageIsoCode.open(table);
+    }

    public LongArrayColumn.Reader openTermIds(SlopTable table) throws IOException {
        return termIds.open(table);
--- a/code/index/index-journal/java/nu/marginalia/index/journal/IndexJournalSlopWriter.java
+++ b/code/index/index-journal/java/nu/marginalia/index/journal/IndexJournalSlopWriter.java
@@ -1,6 +1,6 @@
 package nu.marginalia.index.journal;

-import nu.marginalia.hash.MurmurHash3_128;
+import nu.marginalia.language.keywords.KeywordHasher;
 import nu.marginalia.model.processed.SlopDocumentRecord;
 import nu.marginalia.sequence.slop.VarintCodedSequenceArrayColumn;
 import nu.marginalia.slop.SlopTable;
@@ -8,6 +8,7 @@ import nu.marginalia.slop.column.array.ByteArrayColumn;
 import nu.marginalia.slop.column.array.LongArrayColumn;
 import nu.marginalia.slop.column.primitive.IntColumn;
 import nu.marginalia.slop.column.primitive.LongColumn;
+import nu.marginalia.slop.column.string.EnumColumn;

 import java.io.IOException;
 import java.nio.file.Files;
@@ -27,8 +28,7 @@ public class IndexJournalSlopWriter extends SlopTable {

    private final VarintCodedSequenceArrayColumn.Writer spansWriter;
    private final ByteArrayColumn.Writer spanCodesWriter;
-
-    private static final MurmurHash3_128 hash = new MurmurHash3_128();
+    private final EnumColumn.Writer languagesWriter;

    public IndexJournalSlopWriter(Path dir, int page) throws IOException {

@@ -50,14 +50,17 @@ public class IndexJournalSlopWriter extends SlopTable {

        spanCodesWriter = IndexJournalPage.spanCodes.create(this);
        spansWriter = IndexJournalPage.spans.create(this);
+
+        languagesWriter = IndexJournalPage.languageIsoCode.create(this);
    }

-    public void put(long combinedId, SlopDocumentRecord.KeywordsProjection keywordsProjection) throws IOException {
+    public void put(long combinedId, SlopDocumentRecord.KeywordsProjection keywordsProjection, KeywordHasher hasher) throws IOException {

        combinedIdWriter.put(combinedId);
        featuresWriter.put(keywordsProjection.htmlFeatures());
        sizeWriter.put(keywordsProjection.length());
        documentMetaWriter.put(keywordsProjection.documentMetadata());
+        languagesWriter.put(keywordsProjection.languageIsoCode());

        // -- write keyword data --

@@ -66,7 +69,7 @@ public class IndexJournalSlopWriter extends SlopTable {
        // termIds are the special hashes of the keywords
        long[] termIds = new long[keywordsProjection.words().size()];
        for (int i = 0; i < termIds.length; i++) {
-            termIds[i] = hash.hashKeyword(keywords.get(i));
+            termIds[i] = hasher.hashKeyword(keywords.get(i));
        }

        termIdsWriter.put(termIds);
@@ -87,6 +90,7 @@ public class IndexJournalSlopWriter extends SlopTable {
        termIdsWriter.close();
        termMetadataWriter.close();
        termPositionsWriter.close();
+        languagesWriter.close();
        spansWriter.close();
        spanCodesWriter.close();
    }
--- a/code/index/index-perftest/build.gradle
+++ b/code/index/index-perftest/build.gradle
@@ -0,0 +1,51 @@
+plugins {
+    id 'java'
+    id 'application'
+    id 'jvm-test-suite'
+}
+
+java {
+    toolchain {
+        languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion))
+    }
+}
+
+application {
+    mainClass = 'nu.marginalia.index.perftest.PerfTestMain'
+}
+
+apply from: "$rootProject.projectDir/srcsets.gradle"
+
+dependencies {
+    implementation project(':code:common:config')
+    implementation project(':code:common:db')
+
+    implementation project(':code:libraries:array')
+    implementation project(':code:libraries:native')
+    implementation project(':code:libraries:btree')
+    implementation project(':code:libraries:language-processing')
+    implementation project(':code:common:linkdb')
+    implementation project(':code:index')
+    implementation project(':third-party:commons-codec')
+    implementation project(':code:functions:search-query')
+    implementation project(':code:functions:search-query:api')
+
+    implementation libs.slop
+    implementation libs.roaringbitmap
+    implementation libs.bundles.slf4j
+    implementation libs.guava
+
+    libs.bundles.grpc.get().each {
+        implementation dependencies.create(it) {
+            exclude group: 'com.google.guava'
+        }
+    }
+
+
+    implementation libs.notnull
+    implementation libs.trove
+    implementation libs.fastutil
+    implementation libs.bundles.gson
+    implementation libs.bundles.mariadb
+
+}
--- a/code/index/index-perftest/java/nu/marginalia/index/perftest/IoPatternsMain.java
+++ b/code/index/index-perftest/java/nu/marginalia/index/perftest/IoPatternsMain.java
@@ -0,0 +1,262 @@
+package nu.marginalia.index.perftest;
+
+import nu.marginalia.ffi.LinuxSystemCalls;
+import nu.marginalia.uring.UringFileReader;
+
+import java.io.IOException;
+import java.lang.foreign.Arena;
+import java.lang.foreign.MemorySegment;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Random;
+import java.util.stream.LongStream;
+
+public class IoPatternsMain {
+
+    static void testBuffered(int sz, int small, int large, int iters) {
+        try {
+            Path largeFile = Path.of("/home/vlofgren/largefile.dat");
+            long fileSize = Files.size(largeFile);
+
+            Random r = new Random();
+            List<MemorySegment> segments = new ArrayList<>();
+            for (int i = 0; i < sz; i++) {
+                if (small == large) {
+                    segments.add(Arena.ofAuto().allocate(small));
+                }
+                else {
+                    segments.add(Arena.ofAuto().allocate(r.nextInt(small, large)));
+                }
+            }
+            List<Long> offsets = new ArrayList<>();
+
+            long[] samples = new long[1000];
+            int si = 0;
+
+            try (UringFileReader reader = new UringFileReader(largeFile, false)) {
+                for (int iter = 0; iter < iters; ) {
+                    if (si == samples.length) {
+                        Arrays.sort(samples);
+                        double p1 = samples[10] / 1_000.;
+                        double p10 = samples[100] / 1_000.;
+                        double p90 = samples[900] / 1_000.;
+                        double p99 = samples[990] / 1_000.;
+                        double avg = LongStream.of(samples).average().getAsDouble() / 1000.;
+                        System.out.println("B"+"\t"+avg+"\t"+p1 + " " + p10 + " " + p90 + " " + p99);
+                        si = 0;
+                        iter++;
+                    }
+
+                    offsets.clear();
+                    for (int i = 0; i < sz; i++) {
+                        offsets.add(r.nextLong(0, fileSize - 256));
+                    }
+
+                    long st = System.nanoTime();
+                    reader.read(segments, offsets);
+                    long et = System.nanoTime();
+
+                    samples[si++] = et - st;
+
+                }
+
+            }
+        }
+        catch (IOException e) {
+            e.printStackTrace();
+        }
+    }
+
+    static void testBufferedPread(int sz, int iters) {
+        try {
+            Path largeFile = Path.of("/home/vlofgren/largefile.dat");
+            long fileSize = Files.size(largeFile);
+
+            Random r = new Random();
+            List<MemorySegment> segments = new ArrayList<>();
+            for (int i = 0; i < sz; i++) {
+                segments.add(Arena.ofAuto().allocate(r.nextInt(24, 256)));
+            }
+            List<Long> offsets = new ArrayList<>();
+
+            long[] samples = new long[1000];
+            int si = 0;
+
+            int fd = -1;
+            try {
+                fd = LinuxSystemCalls.openBuffered(largeFile);
+                LinuxSystemCalls.fadviseRandom(fd);
+
+                for (int iter = 0; iter < iters; ) {
+                    if (si == samples.length) {
+                        Arrays.sort(samples);
+                        double p1 = samples[10] / 1_000.;
+                        double p10 = samples[100] / 1_000.;
+                        double p90 = samples[900] / 1_000.;
+                        double p99 = samples[990] / 1_000.;
+                        double avg = LongStream.of(samples).average().getAsDouble() / 1000.;
+                        System.out.println("BP"+"\t"+avg+"\t"+p1 + " " + p10 + " " + p90 + " " + p99);
+                        si = 0;
+                        iter++;
+                    }
+
+                    offsets.clear();
+                    for (int i = 0; i < sz; i++) {
+                        offsets.add(r.nextLong(0, fileSize - 256));
+                    }
+
+                    long st = System.nanoTime();
+                    for (int i = 0; i < sz; i++) {
+                        LinuxSystemCalls.readAt(fd, segments.get(i), offsets.get(i));
+                    }
+                    long et = System.nanoTime();
+
+                    samples[si++] = et - st;
+                }
+            }
+            finally {
+                LinuxSystemCalls.closeFd(fd);
+            }
+        }
+        catch (IOException e) {
+            e.printStackTrace();
+        }
+    }
+
+
+    static void testDirect(int blockSize, int sz, int iters) {
+        try {
+            Path largeFile = Path.of("/home/vlofgren/largefile.dat");
+            int fileSizeBlocks = (int) ((Files.size(largeFile) & -blockSize) / blockSize);
+
+            Random r = new Random();
+            List<MemorySegment> segments = new ArrayList<>();
+            for (int i = 0; i < sz; i++) {
+                segments.add(Arena.ofAuto().allocate(blockSize, blockSize));
+            }
+            List<Long> offsets = new ArrayList<>();
+
+            long[] samples = new long[1000];
+            int si = 0;
+
+            try (UringFileReader reader = new UringFileReader(largeFile, true)) {
+                for (int iter = 0; iter < iters; ) {
+                    if (si == samples.length) {
+                        Arrays.sort(samples);
+                        double p1 = samples[10] / 1_000.;
+                        double p10 = samples[100] / 1_000.;
+                        double p90 = samples[900] / 1_000.;
+                        double p99 = samples[990] / 1_000.;
+                        double avg = LongStream.of(samples).average().getAsDouble() / 1000.;
+                        System.out.println("DN"+blockSize+"\t"+avg+"\t"+p1 + " " + p10 + " " + p90 + " " + p99);
+                        si = 0;
+                        iters++;
+                    }
+
+                    offsets.clear();
+                    for (int i = 0; i < sz; i++) {
+                        offsets.add(blockSize * r.nextLong(0, fileSizeBlocks));
+                    }
+
+                    long st = System.nanoTime();
+                    reader.read(segments, offsets);
+                    long et = System.nanoTime();
+
+                    samples[si++] = et - st;
+
+                }
+
+            }
+        }
+        catch (IOException e) {
+            e.printStackTrace();
+        }
+    }
+
+
+    static void testDirect1(int blockSize, int iters) {
+        try {
+            Path largeFile = Path.of("/home/vlofgren/largefile.dat");
+            int fileSizeBlocks = (int) ((Files.size(largeFile) & -blockSize) / blockSize);
+
+            Random r = new Random();
+            MemorySegment segment = Arena.global().allocate(blockSize, blockSize);
+
+            long[] samples = new long[1000];
+            int si = 0;
+
+            int fd = LinuxSystemCalls.openDirect(largeFile);
+            if (fd < 0) {
+                throw new IOException("open failed");
+            }
+            try {
+                for (int iter = 0; iter < iters; ) {
+                    if (si == samples.length) {
+                        Arrays.sort(samples);
+                        double p1 = samples[10] / 1_000.;
+                        double p10 = samples[100] / 1_000.;
+                        double p90 = samples[900] / 1_000.;
+                        double p99 = samples[990] / 1_000.;
+                        double avg = LongStream.of(samples).average().getAsDouble() / 1000.;
+                        System.out.println("D1"+blockSize+"\t"+avg+"\t"+p1 + " " + p10 + " " + p90 + " " + p99);
+                        si = 0;
+                        iters++;
+                    }
+
+
+                    long st = System.nanoTime();
+                    int ret;
+                    long readOffset = blockSize * r.nextLong(0, fileSizeBlocks);
+                    if (blockSize != (ret = LinuxSystemCalls.readAt(fd, segment, readOffset))) {
+                        throw new IOException("pread failed: " + ret);
+                    }
+                    long et = System.nanoTime();
+
+                    samples[si++] = et - st;
+
+                }
+
+            }
+            finally {
+                LinuxSystemCalls.closeFd(fd);
+            }
+        }
+        catch (IOException e) {
+            e.printStackTrace();
+        }
+    }
+
+    public static void main(String[] args) throws Exception {
+//        Thread.ofPlatform().start(() -> testBuffered(128,  32, 65536,1000));
+        Thread.ofPlatform().start(() -> testDirect(8192*4, 128,1000));
+//        Thread.ofPlatform().start(() -> testBuffered(128, 1000));
+//        Thread.ofPlatform().start(() -> testBuffered(128, 1000));
+//        Thread.ofPlatform().start(() -> testBuffered(128, 1000));
+//        Thread.ofPlatform().start(() -> testBufferedPread(128, 1000));
+
+//        Thread.ofPlatform().start(() -> testDirect1(1024, 1000));
+//        Thread.ofPlatform().start(() -> testDirect1(1024, 1000));
+//        Thread.ofPlatform().start(() -> testDirect1(1024, 1000));
+//        Thread.ofPlatform().start(() -> testDirect1(1024*1024, 1000));
+//        Thread.ofPlatform().start(() -> testDirect1(1024*1024, 1000));
+//        Thread.ofPlatform().start(() -> testDirect(512, 512,1000));
+//        Thread.ofPlatform().start(() -> testDirect(512, 512,1000));
+//        Thread.ofPlatform().start(() -> testDirect(512, 512,1000));
+//        Thread.ofPlatform().start(() -> testDirect(512, 100));
+//        Thread.ofPlatform().start(() -> testDirect(512, 100));
+//        Thread.ofPlatform().start(() -> testDirect(512, 100));
+//        Thread.ofPlatform().start(() -> testDirect(512, 100));
+//        Thread.ofPlatform().start(() -> testBuffered(512, 1000));
+//        Thread.ofPlatform().start(() -> testBuffered(512, 1000));
+//        Thread.ofPlatform().start(() -> testBuffered(512, 1000));
+//        Thread.ofPlatform().start(() -> testBuffered(512, 1000));
+//        Thread.ofPlatform().start(() -> testBuffered(100));
+//        Thread.ofPlatform().start(() -> testBuffered(100));
+
+        for (;;);
+//        testBuffered(100);
+    }
+}
--- a/code/index/index-perftest/java/nu/marginalia/index/perftest/PerfTestMain.java
+++ b/code/index/index-perftest/java/nu/marginalia/index/perftest/PerfTestMain.java
@@ -0,0 +1,307 @@
+package nu.marginalia.index.perftest;
+
+import gnu.trove.list.array.TLongArrayList;
+import nu.marginalia.api.searchquery.RpcQueryLimits;
+import nu.marginalia.api.searchquery.model.query.NsfwFilterTier;
+import nu.marginalia.api.searchquery.model.query.QueryParams;
+import nu.marginalia.api.searchquery.model.query.SearchSpecification;
+import nu.marginalia.api.searchquery.model.results.PrototypeRankingParameters;
+import nu.marginalia.array.page.LongQueryBuffer;
+import nu.marginalia.functions.searchquery.QueryFactory;
+import nu.marginalia.functions.searchquery.query_parser.QueryExpansion;
+import nu.marginalia.index.CombinedIndexReader;
+import nu.marginalia.index.IndexQueryExecution;
+import nu.marginalia.index.StatefulIndex;
+import nu.marginalia.index.forward.ForwardIndexReader;
+import nu.marginalia.index.model.CombinedDocIdList;
+import nu.marginalia.index.model.SearchContext;
+import nu.marginalia.index.results.DomainRankingOverrides;
+import nu.marginalia.index.results.IndexResultRankingService;
+import nu.marginalia.index.reverse.FullReverseIndexReader;
+import nu.marginalia.index.reverse.PrioReverseIndexReader;
+import nu.marginalia.index.reverse.WordLexicon;
+import nu.marginalia.index.reverse.query.IndexQuery;
+import nu.marginalia.index.searchset.SearchSetAny;
+import nu.marginalia.language.keywords.KeywordHasher;
+import nu.marginalia.linkdb.docs.DocumentDbReader;
+import nu.marginalia.segmentation.NgramLexicon;
+import nu.marginalia.term_frequency_dict.TermFrequencyDict;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.sql.SQLException;
+import java.time.Duration;
+import java.time.Instant;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.concurrent.TimeoutException;
+
+public class PerfTestMain {
+    static Duration warmupTime = Duration.ofMinutes(1);
+    static Duration runTime = Duration.ofMinutes(10);
+
+    public static void main(String[] args) {
+        if (args.length != 4) {
+            System.err.println("Arguments: home-dir index-dir query");
+            System.exit(255);
+        }
+
+        try {
+            Path indexDir = Paths.get(args[0]);
+            if (!Files.isDirectory(indexDir)) {
+                System.err.println("Index directory is not a directory");
+                System.exit(255);
+            }
+            Path homeDir = Paths.get(args[1]);
+            String scenario = args[2];
+            String query = args[3];
+
+            switch (scenario) {
+                case "valuation" -> runValuation(indexDir, homeDir, query);
+                case "lookup" -> runLookup(indexDir, homeDir, query);
+                case "execution" -> runExecution(indexDir, homeDir, query);
+            }
+
+            System.exit(0);
+        }
+        catch (NumberFormatException e) {
+            System.err.println("Arguments: data-dir index-dir query");
+            System.exit(255);
+        }
+        catch (Exception ex) {
+            System.err.println("Error during testing");
+            ex.printStackTrace();
+            System.exit(255);
+        }
+        System.out.println(Arrays.toString(args));
+    }
+
+    private static CombinedIndexReader createCombinedIndexReader(Path indexDir) throws IOException {
+
+        return new CombinedIndexReader(
+                new ForwardIndexReader(
+                        indexDir.resolve("ir/fwd-doc-id.dat"),
+                        indexDir.resolve("ir/fwd-doc-data.dat"),
+                        indexDir.resolve("ir/fwd-spans.dat")
+                ),
+                new FullReverseIndexReader(
+                        "full",
+                        List.of(new WordLexicon("en", indexDir.resolve("ir/rev-words-en.dat"))),
+                        indexDir.resolve("ir/rev-docs.dat"),
+                        indexDir.resolve("ir/rev-positions.dat")
+                ),
+                new PrioReverseIndexReader(
+                        "prio",
+                        List.of(new WordLexicon("en", indexDir.resolve("ir/rev-words-prio-en.dat"))),
+                        indexDir.resolve("ir/rev-prio-docs.dat")
+                )
+        );
+    }
+
+    private static IndexResultRankingService createIndexResultRankingService(Path indexDir, CombinedIndexReader combinedIndexReader) throws IOException, SQLException {
+        return new IndexResultRankingService(
+                new DocumentDbReader(indexDir.resolve("ldbr/documents.db")),
+                new StatefulIndex(combinedIndexReader),
+                new DomainRankingOverrides(null, Path.of("xxxx"))
+        );
+    }
+
+    static QueryFactory createQueryFactory(Path homeDir) throws IOException {
+        return new QueryFactory(
+                new QueryExpansion(
+                        new TermFrequencyDict(homeDir.resolve("model/tfreq-new-algo3.bin")),
+                        new NgramLexicon()
+                )
+        );
+    }
+
+    public static void runValuation(Path homeDir,
+                                    Path indexDir,
+                                    String rawQuery) throws IOException, SQLException, TimeoutException {
+
+        CombinedIndexReader indexReader = createCombinedIndexReader(indexDir);
+        QueryFactory queryFactory = createQueryFactory(homeDir);
+        IndexResultRankingService rankingService = createIndexResultRankingService(indexDir, indexReader);
+
+        var queryLimits = RpcQueryLimits.newBuilder()
+                .setTimeoutMs(10_000)
+                .setResultsTotal(1000)
+                .setResultsByDomain(10)
+                .setFetchSize(4096)
+                .build();
+        SearchSpecification parsedQuery = queryFactory.createQuery(new QueryParams(rawQuery, queryLimits, "NONE", NsfwFilterTier.OFF, "en"), PrototypeRankingParameters.sensibleDefaults()).specs;
+
+        System.out.println("Query compiled to: " + parsedQuery.query.compiledQuery);
+
+        var rankingContext = SearchContext.create(indexReader, new KeywordHasher.AsciiIsh(), parsedQuery, new SearchSetAny());
+        List<IndexQuery> queries = indexReader.createQueries(rankingContext);
+
+        TLongArrayList allResults = new TLongArrayList();
+        LongQueryBuffer buffer = new LongQueryBuffer(512);
+
+        for (var query : queries) {
+            while (query.hasMore() && allResults.size() < 512 ) {
+                query.getMoreResults(buffer);
+                allResults.addAll(buffer.copyData());
+            }
+            if (allResults.size() >= 512)
+                break;
+        }
+        allResults.sort();
+        if (allResults.size() > 512) {
+            allResults.subList(512,  allResults.size()).clear();
+        }
+
+        var rankingData = rankingService.prepareRankingData(rankingContext, new CombinedDocIdList(allResults.toArray()));
+
+        int sum = 0;
+
+        Instant runEndTime = Instant.now().plus(runTime);
+        Instant runStartTime =  Instant.now();
+        int sum2 = 0;
+        List<Double> times = new ArrayList<>();
+
+        int iter;
+        for (iter = 0;; iter++) {
+            long start = System.nanoTime();
+            sum2 += rankingService.rankResults(rankingContext, rankingData, false).size();
+            long end = System.nanoTime();
+            times.add((end - start)/1_000_000.);
+
+            if ((iter % 100) == 0) {
+                if (Instant.now().isAfter(runEndTime)) {
+                    break;
+                }
+                if (times.size() > 100) {
+                    double[] timesSample = times.stream().mapToDouble(Double::doubleValue).skip(times.size() - 100).sorted().toArray();
+                    System.out.format("P1: %f P10: %f, P90: %f, P99: %f\n", timesSample[1], timesSample[10], timesSample[90], timesSample[99]);
+                }
+                System.out.println(Duration.between(runStartTime, Instant.now()).toMillis() / 1000. + " best times: " + (allResults.size() / 512.) *  times.stream().mapToDouble(Double::doubleValue).sorted().limit(3).average().orElse(-1));
+            }
+        }
+        System.out.println("Benchmark complete after " + iter + " iters!");
+
+        System.out.println("Best times: " + (allResults.size() / 512.) *  times.stream().mapToDouble(Double::doubleValue).sorted().limit(3).average().orElse(-1));
+        System.out.println("Warmup sum: " + sum);
+        System.out.println("Main sum: " + sum2);
+        System.out.println(rankingData.size());
+    }
+
+    public static void runExecution(Path homeDir,
+                                    Path indexDir,
+                                    String rawQuery) throws IOException, SQLException, InterruptedException {
+
+        CombinedIndexReader indexReader = createCombinedIndexReader(indexDir);
+        QueryFactory queryFactory = createQueryFactory(homeDir);
+        IndexResultRankingService rankingService = createIndexResultRankingService(indexDir, indexReader);
+
+        var queryLimits = RpcQueryLimits.newBuilder()
+                .setTimeoutMs(50)
+                .setResultsTotal(1000)
+                .setResultsByDomain(10)
+                .setFetchSize(4096)
+                .build();
+        SearchSpecification parsedQuery = queryFactory.createQuery(new QueryParams(rawQuery, queryLimits, "NONE", NsfwFilterTier.OFF, "en"), PrototypeRankingParameters.sensibleDefaults()).specs;
+        System.out.println("Query compiled to: " + parsedQuery.query.compiledQuery);
+
+        System.out.println("Running warmup loop!");
+        int sum = 0;
+
+        Instant runEndTime = Instant.now().plus(runTime);
+        Instant runStartTime =  Instant.now();
+        int sum2 = 0;
+        List<Double> rates = new ArrayList<>();
+        List<Double> times = new ArrayList<>();
+        int iter;
+        for (iter = 0;; iter++) {
+            var execution = new IndexQueryExecution(indexReader, rankingService, SearchContext.create(indexReader, new KeywordHasher.AsciiIsh(), parsedQuery, new SearchSetAny()), 1);
+            long start = System.nanoTime();
+            execution.run();
+            long end = System.nanoTime();
+            sum2 += execution.itemsProcessed();
+            rates.add(execution.itemsProcessed() / ((end - start)/1_000_000_000.));
+            times.add((end - start)/1_000_000.);
+            indexReader.reset();
+            if ((iter % 100) == 0) {
+                if (Instant.now().isAfter(runEndTime)) {
+                    break;
+                }
+                if (times.size() > 100) {
+                    double[] timesSample = times.stream().mapToDouble(Double::doubleValue).skip(times.size() - 100).sorted().toArray();
+                    System.out.format("P1: %f P10: %f, P90: %f, P99: %f\n", timesSample[1], timesSample[10], timesSample[90], timesSample[99]);
+                }
+                System.out.println(Duration.between(runStartTime, Instant.now()).toMillis() / 1000. + " best rates: " +  rates.stream().mapToDouble(Double::doubleValue).map(i -> -i).sorted().map(i -> -i).limit(3).average().orElse(-1));
+            }
+        }
+
+        System.out.println("Benchmark complete after " + iter + " iters!");
+        System.out.println("Best counts: " + rates.stream().mapToDouble(Double::doubleValue).map(i -> -i).sorted().map(i -> -i).limit(3).average().orElse(-1));
+        System.out.println("Warmup sum: " + sum);
+        System.out.println("Main sum: " + sum2);
+    }
+
+    public static void runLookup(Path homeDir,
+                                    Path indexDir,
+                                    String rawQuery) throws IOException, SQLException
+    {
+
+        CombinedIndexReader indexReader = createCombinedIndexReader(indexDir);
+        QueryFactory queryFactory = createQueryFactory(homeDir);
+
+        var queryLimits = RpcQueryLimits.newBuilder()
+                .setTimeoutMs(10_000)
+                .setResultsTotal(1000)
+                .setResultsByDomain(10)
+                .setFetchSize(4096)
+                .build();
+        SearchSpecification parsedQuery = queryFactory.createQuery(new QueryParams(rawQuery, queryLimits, "NONE", NsfwFilterTier.OFF, "en"), PrototypeRankingParameters.sensibleDefaults()).specs;
+
+        System.out.println("Query compiled to: " + parsedQuery.query.compiledQuery);
+
+        SearchContext searchContext = SearchContext.create(indexReader, new KeywordHasher.AsciiIsh(), parsedQuery, new SearchSetAny());
+
+
+        Instant runEndTime = Instant.now().plus(runTime);
+
+        LongQueryBuffer buffer = new LongQueryBuffer(512);
+        int sum1 = 0;
+        int iter;
+
+        Instant runStartTime =  Instant.now();
+        int sum2 = 0;
+        List<Double> times = new ArrayList<>();
+        for (iter = 0;; iter++) {
+            indexReader.reset();
+            List<IndexQuery> queries = indexReader.createQueries(searchContext);
+
+            long start = System.nanoTime();
+            for (var query : queries) {
+                while (query.hasMore()) {
+                    query.getMoreResults(buffer);
+                    sum1 += buffer.end;
+                    buffer.reset();
+                }
+            }
+            long end = System.nanoTime();
+            times.add((end - start)/1_000_000_000.);
+
+            if ((iter % 10) == 0) {
+                if (Instant.now().isAfter(runEndTime)) {
+                    break;
+                }
+                if (times.size() > 100) {
+                    double[] timesSample = times.stream().mapToDouble(Double::doubleValue).skip(times.size() - 100).sorted().toArray();
+                    System.out.format("P1: %f P10: %f, P90: %f, P99: %f\n", timesSample[1], timesSample[10], timesSample[90], timesSample[99]);
+                }
+                System.out.println(Duration.between(runStartTime, Instant.now()).toMillis() / 1000. + " best times: " + times.stream().mapToDouble(Double::doubleValue).sorted().limit(3).average().orElse(-1));
+            }
+        }
+        System.out.println("Benchmark complete after " + iter + " iters!");
+        System.out.println("Best times: " + times.stream().mapToDouble(Double::doubleValue).sorted().limit(3).average().orElse(-1));
+        System.out.println("Warmup sum: " + sum1);
+        System.out.println("Main sum: " + sum2);
+    }
+}
--- a/code/index/index-reverse/build.gradle
+++ b/code/index/index-reverse/build.gradle
@@ -1,41 +0,0 @@
-plugins {
-    id 'java'
-
-
-    id 'jvm-test-suite'
-}
-
-java {
-    toolchain {
-        languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion))
-    }
-}
-
-apply from: "$rootProject.projectDir/srcsets.gradle"
-
-dependencies {
-    implementation project(':code:libraries:array')
-    implementation project(':code:libraries:btree')
-    implementation project(':code:libraries:coded-sequence')
-    implementation project(':code:libraries:random-write-funnel')
-    implementation project(':code:index:query')
-    implementation project(':code:index:index-journal')
-    implementation project(':code:common:model')
-    implementation project(':code:common:service')
-    implementation project(':code:processes:converting-process:model')
-
-    implementation project(':third-party:parquet-floor')
-    implementation project(':third-party:commons-codec')
-
-
-    implementation libs.bundles.slf4j
-
-    implementation libs.slop
-    implementation libs.fastutil
-
-    testImplementation libs.bundles.slf4j.test
-    testImplementation libs.bundles.junit
-    testImplementation libs.mockito
-    testImplementation project(':code:libraries:test-helpers')
-}
-
--- a/code/index/index-reverse/java/nu/marginalia/index/FullIndexEntrySource.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/FullIndexEntrySource.java
@@ -1,69 +0,0 @@
-package nu.marginalia.index;
-
-import nu.marginalia.array.page.LongQueryBuffer;
-import nu.marginalia.btree.BTreeReader;
-import nu.marginalia.index.query.EntrySource;
-
-import static java.lang.Math.min;
-
-public class FullIndexEntrySource implements EntrySource {
-    private final String name;
-    private final BTreeReader reader;
-
-    int pos;
-    int endOffset;
-
-    final int entrySize;
-    private final long wordId;
-
-    public FullIndexEntrySource(String name,
-                                BTreeReader reader,
-                                int entrySize,
-                                long wordId) {
-        this.name = name;
-        this.reader = reader;
-        this.entrySize = entrySize;
-        this.wordId = wordId;
-
-        pos = 0;
-        endOffset = pos + entrySize * reader.numEntries();
-    }
-
-    @Override
-    public void skip(int n) {
-        pos += n;
-    }
-
-    @Override
-    public void read(LongQueryBuffer buffer) {
-        buffer.reset();
-        buffer.end = min(buffer.end, endOffset - pos);
-        reader.readData(buffer.data, buffer.end, pos);
-        pos += buffer.end;
-
-        destagger(buffer);
-        buffer.uniq();
-    }
-
-    private void destagger(LongQueryBuffer buffer) {
-        if (entrySize == 1)
-            return;
-
-        for (int ri = entrySize, wi=1; ri < buffer.end ; ri+=entrySize, wi++) {
-            buffer.data.set(wi, buffer.data.get(ri));
-        }
-
-        buffer.end /= entrySize;
-    }
-
-    @Override
-    public boolean hasMore() {
-        return pos < endOffset;
-    }
-
-
-    @Override
-    public String indexName() {
-        return name + ":" + Long.toHexString(wordId);
-    }
-}
--- a/code/index/index-reverse/java/nu/marginalia/index/FullReverseIndexReader.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/FullReverseIndexReader.java
@@ -1,188 +0,0 @@
-package nu.marginalia.index;
-
-import nu.marginalia.array.LongArray;
-import nu.marginalia.array.LongArrayFactory;
-import nu.marginalia.btree.BTreeReader;
-import nu.marginalia.index.positions.TermData;
-import nu.marginalia.index.positions.PositionsFileReader;
-import nu.marginalia.index.query.EmptyEntrySource;
-import nu.marginalia.index.query.EntrySource;
-import nu.marginalia.index.query.ReverseIndexRejectFilter;
-import nu.marginalia.index.query.ReverseIndexRetainFilter;
-import nu.marginalia.index.query.filter.QueryFilterLetThrough;
-import nu.marginalia.index.query.filter.QueryFilterNoPass;
-import nu.marginalia.index.query.filter.QueryFilterStepIf;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.lang.foreign.Arena;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.util.concurrent.Executors;
-
-public class FullReverseIndexReader {
-    private final LongArray words;
-    private final LongArray documents;
-    private final long wordsDataOffset;
-    private final Logger logger = LoggerFactory.getLogger(getClass());
-    private final BTreeReader wordsBTreeReader;
-    private final String name;
-
-    private final PositionsFileReader positionsFileReader;
-
-    public FullReverseIndexReader(String name,
-                                  Path words,
-                                  Path documents,
-                                  PositionsFileReader positionsFileReader) throws IOException {
-        this.name = name;
-
-        this.positionsFileReader = positionsFileReader;
-
-        if (!Files.exists(words) || !Files.exists(documents)) {
-            this.words = null;
-            this.documents = null;
-            this.wordsBTreeReader = null;
-            this.wordsDataOffset = -1;
-            return;
-        }
-
-        logger.info("Switching reverse index");
-
-        this.words = LongArrayFactory.mmapForReadingShared(words);
-        this.documents = LongArrayFactory.mmapForReadingShared(documents);
-
-        wordsBTreeReader = new BTreeReader(this.words, ReverseIndexParameters.wordsBTreeContext, 0);
-        wordsDataOffset = wordsBTreeReader.getHeader().dataOffsetLongs();
-
-        if (getClass().desiredAssertionStatus()) {
-            if (Boolean.getBoolean("index-self-test")) {
-                Executors.newSingleThreadExecutor().execute(this::selfTest);
-            }
-        }
-    }
-
-    private void selfTest() {
-        logger.info("Running self test program");
-
-        long wordsDataSize = wordsBTreeReader.getHeader().numEntries() * 2L;
-        var wordsDataRange = words.range(wordsDataOffset, wordsDataOffset + wordsDataSize);
-
-//        ReverseIndexSelfTest.runSelfTest1(wordsDataRange, wordsDataSize);
-//        ReverseIndexSelfTest.runSelfTest2(wordsDataRange, documents);
-//        ReverseIndexSelfTest.runSelfTest3(wordsDataRange, wordsBTreeReader);
-//        ReverseIndexSelfTest.runSelfTest4(wordsDataRange, documents);
-        ReverseIndexSelfTest.runSelfTest5(wordsDataRange, wordsBTreeReader);
-        ReverseIndexSelfTest.runSelfTest6(wordsDataRange, documents);
-    }
-
-
-    /** Calculate the offset of the word in the documents.
-     * If the return-value is negative, the term does not exist
-     * in the index.
-     */
-    long wordOffset(long termId) {
-        long idx = wordsBTreeReader.findEntry(termId);
-
-        if (idx < 0)
-            return -1L;
-
-        return words.get(wordsDataOffset + idx + 1);
-    }
-
-    public EntrySource documents(long termId) {
-        if (null == words) {
-            logger.warn("Reverse index is not ready, dropping query");
-            return new EmptyEntrySource();
-        }
-
-        long offset = wordOffset(termId);
-
-        if (offset < 0) // No documents
-            return new EmptyEntrySource();
-
-        return new FullIndexEntrySource(name, createReaderNew(offset), 2, termId);
-    }
-
-    /** Create a filter step requiring the specified termId to exist in the documents */
-    public QueryFilterStepIf also(long termId) {
-        long offset = wordOffset(termId);
-
-        if (offset < 0) // No documents
-            return new QueryFilterNoPass();
-
-        return new ReverseIndexRetainFilter(createReaderNew(offset), name, termId);
-    }
-
-    /** Create a filter step requiring the specified termId to be absent from the documents */
-    public QueryFilterStepIf not(long termId) {
-        long offset = wordOffset(termId);
-
-        if (offset < 0) // No documents
-            return new QueryFilterLetThrough();
-
-        return new ReverseIndexRejectFilter(createReaderNew(offset));
-    }
-
-    /** Return the number of documents with the termId in the index */
-    public int numDocuments(long termId) {
-        long offset = wordOffset(termId);
-
-        if (offset < 0)
-            return 0;
-
-        return createReaderNew(offset).numEntries();
-    }
-
-    /** Create a BTreeReader for the document offset associated with a termId */
-    private BTreeReader createReaderNew(long offset) {
-        return new BTreeReader(
-                documents,
-                ReverseIndexParameters.fullDocsBTreeContext,
-                offset);
-    }
-
-    public TermData[] getTermData(Arena arena,
-                                  long termId,
-                                  long[] docIds)
-    {
-        var ret = new TermData[docIds.length];
-
-        long offset = wordOffset(termId);
-
-        if (offset < 0) {
-            // This is likely a bug in the code, but we can't throw an exception here
-            logger.debug("Missing offset for word {}", termId);
-            return ret;
-        }
-
-        var reader = createReaderNew(offset);
-
-        // Read the size and offset of the position data
-        var offsets = reader.queryData(docIds, 1);
-
-        for (int i = 0; i < docIds.length; i++) {
-            if (offsets[i] == 0)
-                continue;
-            ret[i] = positionsFileReader.getTermData(arena, offsets[i]);
-        }
-        return ret;
-    }
-
-    public void close() {
-        if (documents != null)
-            documents.close();
-
-        if (words != null)
-            words.close();
-
-        if (positionsFileReader != null) {
-            try {
-                positionsFileReader.close();
-            } catch (IOException e) {
-                logger.error("Failed to close positions file reader", e);
-            }
-        }
-    }
-
-}
--- a/code/index/index-reverse/java/nu/marginalia/index/ReverseIndexFullFileNames.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/ReverseIndexFullFileNames.java
@@ -1,33 +0,0 @@
-package nu.marginalia.index;
-
-import java.nio.file.Path;
-
-public class ReverseIndexFullFileNames {
-    public static Path resolve(Path basePath, FileIdentifier identifier, FileVersion version) {
-        return switch (identifier) {
-            case WORDS -> switch (version) {
-                case NEXT -> basePath.resolve("rev-words.dat.next");
-                case CURRENT -> basePath.resolve("rev-words.dat");
-            };
-            case DOCS -> switch (version) {
-                case NEXT -> basePath.resolve("rev-docs.dat.next");
-                case CURRENT -> basePath.resolve("rev-docs.dat");
-            };
-            case POSITIONS -> switch (version) {
-                case NEXT -> basePath.resolve("rev-positions.dat.next");
-                case CURRENT -> basePath.resolve("rev-positions.dat");
-            };
-        };
-    }
-
-    public enum FileVersion {
-        CURRENT,
-        NEXT,
-    }
-
-    public enum FileIdentifier {
-        WORDS,
-        DOCS,
-        POSITIONS,
-    }
-}
--- a/code/index/index-reverse/java/nu/marginalia/index/ReverseIndexParameters.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/ReverseIndexParameters.java
@@ -1,11 +0,0 @@
-package nu.marginalia.index;
-
-import nu.marginalia.btree.model.BTreeBlockSize;
-import nu.marginalia.btree.model.BTreeContext;
-
-public class ReverseIndexParameters
-{
-    public static final BTreeContext prioDocsBTreeContext = new BTreeContext(5, 1, BTreeBlockSize.BS_2048);
-    public static final BTreeContext fullDocsBTreeContext = new BTreeContext(5, 2, BTreeBlockSize.BS_2048);
-    public static final BTreeContext wordsBTreeContext = new BTreeContext(5, 2, BTreeBlockSize.BS_2048);
-}
--- a/code/index/index-reverse/java/nu/marginalia/index/ReverseIndexPrioFileNames.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/ReverseIndexPrioFileNames.java
@@ -1,28 +0,0 @@
-package nu.marginalia.index;
-
-import java.nio.file.Path;
-
-public class ReverseIndexPrioFileNames {
-    public static Path resolve(Path basePath, FileIdentifier identifier, FileVersion version) {
-        return switch (identifier) {
-            case WORDS -> switch (version) {
-                case NEXT -> basePath.resolve("rev-prio-words.dat.next");
-                case CURRENT -> basePath.resolve("rev-prio-words.dat");
-            };
-            case DOCS -> switch (version) {
-                case NEXT -> basePath.resolve("rev-prio-docs.dat.next");
-                case CURRENT -> basePath.resolve("rev-prio-docs.dat");
-            };
-        };
-    }
-
-    public enum FileVersion {
-        CURRENT,
-        NEXT
-    }
-
-    public enum FileIdentifier {
-        WORDS,
-        DOCS,
-    }
-}
--- a/code/index/index-reverse/java/nu/marginalia/index/ReverseIndexSelfTest.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/ReverseIndexSelfTest.java
@@ -1,109 +0,0 @@
-package nu.marginalia.index;
-
-import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
-import nu.marginalia.array.LongArray;
-import nu.marginalia.btree.BTreeReader;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.Random;
-
-public class ReverseIndexSelfTest {
-    private static final Logger logger = LoggerFactory.getLogger(ReverseIndexSelfTest.class);
-    public static void runSelfTest1(LongArray wordsDataRange, long wordsDataSize) {
-        logger.info("Starting test 1");
-
-        if (!wordsDataRange.isSortedN(2, 0, wordsDataSize))
-            logger.error("Failed test 1: Words data is not sorted");
-        else
-            logger.info("Passed test 1");
-    }
-
-    public static void runSelfTest2(LongArray wordsDataRange, LongArray documents) {
-        logger.info("Starting test 2");
-        for (long i = 1; i < wordsDataRange.size(); i+=2) {
-            var docsBTreeReader = new BTreeReader(documents, ReverseIndexParameters.fullDocsBTreeContext, wordsDataRange.get(i));
-            var header = docsBTreeReader.getHeader();
-            var docRange = documents.range(header.dataOffsetLongs(), header.dataOffsetLongs() + header.numEntries() * 2L);
-
-            if (!docRange.isSortedN(2, 0, header.numEntries() * 2L)) {
-                logger.error("Failed test 2: numEntries={}, offset={}", header.numEntries(), header.dataOffsetLongs());
-                return;
-            }
-        }
-
-        logger.info("Passed test 2");
-    }
-
-    public static void runSelfTest3(LongArray wordsDataRange, BTreeReader reader) {
-        logger.info("Starting test 3");
-        for (long i = 0; i < wordsDataRange.size(); i+=2) {
-            if (reader.findEntry(wordsDataRange.get(i)) < 0) {
-                logger.error("Failed Test 3");
-                return;
-            }
-        }
-        logger.info("Passed test 3");
-    }
-
-    public static void runSelfTest4(LongArray wordsDataRange, LongArray documents) {
-        logger.info("Starting test 4");
-        for (long i = 1; i < wordsDataRange.size(); i+=2) {
-            var docsBTreeReader = new BTreeReader(documents, ReverseIndexParameters.fullDocsBTreeContext, wordsDataRange.get(i));
-            var header = docsBTreeReader.getHeader();
-            var docRange = documents.range(header.dataOffsetLongs(), header.dataOffsetLongs() + header.numEntries() * 2L);
-            for (int j = 0; j < docRange.size(); j+=2) {
-                if (docsBTreeReader.findEntry(docRange.get(j)) < 0) {
-                    logger.info("Failed test 4");
-                    return;
-                }
-            }
-        }
-        logger.info("Passed test 4");
-    }
-    public static void runSelfTest5(LongArray wordsDataRange, BTreeReader wordsBTreeReader) {
-        logger.info("Starting test 5");
-        LongOpenHashSet words = new LongOpenHashSet((int)wordsDataRange.size()/2);
-        for (int i = 0; i < wordsDataRange.size(); i+=2) {
-            words.add(wordsDataRange.get(i));
-        }
-        var random = new Random();
-        for (int i = 0; i < 100_000_000; i++) {
-            long v;
-            do {
-                v = random.nextLong();
-            } while (words.contains(v));
-            if (wordsBTreeReader.findEntry(v) >= 0) {
-                logger.error("Failed test 5 @ W{}", v);
-                return;
-            }
-        }
-        logger.info("Passed test 5");
-    }
-
-    public static void runSelfTest6(LongArray wordsDataRange, LongArray documents) {
-        logger.info("Starting test 6");
-        for (long i = 1; i < wordsDataRange.size(); i+=2) {
-            var docsBTreeReader = new BTreeReader(documents, ReverseIndexParameters.fullDocsBTreeContext, wordsDataRange.get(i));
-            var header = docsBTreeReader.getHeader();
-            var docRange = documents.range(header.dataOffsetLongs(), header.dataOffsetLongs() + header.numEntries() * 2L);
-            Long prev = null;
-            for (int j = 0; j < docRange.size(); j+=2) {
-                if (prev == null) {
-                    prev = docRange.get(j);
-                    continue;
-                }
-                long thisVal = prev + 1;
-                long nextVal = docRange.get(j);
-                while (thisVal < nextVal) {
-                    if (docsBTreeReader.findEntry(thisVal) >= 0) {
-                        logger.info("Failed test 6 @ W{}:D{}", wordsDataRange.get(i-1), thisVal);
-                        return;
-                    }
-                    thisVal++;
-                }
-            }
-        }
-        logger.info("Passed test 6");
-    }
-}
--- a/code/index/index-reverse/java/nu/marginalia/index/construction/PositionsFileConstructor.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/construction/PositionsFileConstructor.java
@@ -1,76 +0,0 @@
-package nu.marginalia.index.construction;
-
-import nu.marginalia.index.positions.PositionCodec;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.channels.FileChannel;
-import java.nio.file.Path;
-import java.nio.file.StandardOpenOption;
-
-/** A class for constructing a positions file.  This class is thread-safe.
- *
- * <p></p>
- *
- * The positions data is concatenated in the file, with each term's metadata
- * followed by its positions.  The metadata is a single byte, and the positions
- * are encoded using the Elias Gamma code, with zero padded bits at the end to
- * get octet alignment.
- *
- * <p></p>
- *
- * It is the responsibility of the caller to keep track of the byte offset of
- * each posting in the file.
- */
-public class PositionsFileConstructor implements AutoCloseable {
-    private final ByteBuffer workBuffer = ByteBuffer.allocate(65536);
-    
-    private final Path file;
-    private final FileChannel channel;
-
-    private long offset;
-
-    public PositionsFileConstructor(Path file) throws IOException {
-        this.file = file;
-
-        channel = FileChannel.open(file, StandardOpenOption.CREATE, StandardOpenOption.WRITE);
-    }
-
-    /** Add a term to the positions file
-     * @param termMeta the term metadata
-     * @param positionsBuffer the positions of the term
-     * @return the offset of the term in the file, with the size of the data in the highest byte
-     */
-    public long add(byte termMeta, ByteBuffer positionsBuffer) throws IOException {
-        synchronized (file) {
-            int size = 1 + positionsBuffer.remaining();
-
-            if (workBuffer.remaining() < size) {
-                workBuffer.flip();
-                channel.write(workBuffer);
-                workBuffer.clear();
-            }
-
-            workBuffer.put(termMeta);
-            workBuffer.put(positionsBuffer);
-
-            long ret = PositionCodec.encode(size, offset);
-
-            offset += size;
-
-            return ret;
-        }
-    }
-
-    public void close() throws IOException {
-        if (workBuffer.hasRemaining()) {
-            workBuffer.flip();
-
-            while (workBuffer.hasRemaining())
-                channel.write(workBuffer);
-        }
-
-        channel.force(false);
-        channel.close();
-    }
-}
--- a/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullIndexBTreeTransformer.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullIndexBTreeTransformer.java
@@ -1,46 +0,0 @@
-package nu.marginalia.index.construction.full;
-
-import nu.marginalia.array.LongArray;
-import nu.marginalia.array.algo.LongArrayTransformations;
-import nu.marginalia.btree.BTreeWriter;
-import nu.marginalia.btree.model.BTreeContext;
-
-import java.io.IOException;
-
-/** Constructs the BTrees in a reverse index */
-public class FullIndexBTreeTransformer implements LongArrayTransformations.LongIOTransformer {
-    private final BTreeWriter writer;
-    private final int entrySize;
-    private final LongArray documentsArray;
-
-    long start = 0;
-    long writeOffset = 0;
-
-    public FullIndexBTreeTransformer(LongArray urlsFileMap,
-                                     int entrySize,
-                                     BTreeContext bTreeContext,
-                                     LongArray documentsArray) {
-        this.documentsArray = documentsArray;
-        this.writer = new BTreeWriter(urlsFileMap, bTreeContext);
-        this.entrySize = entrySize;
-    }
-
-    @Override
-    public long transform(long pos, long end) throws IOException {
-
-        final int size = (int) ((end - start) / entrySize);
-
-        if (size == 0) {
-            return -1;
-        }
-
-        final long offsetForBlock = writeOffset;
-
-        writeOffset += writer.write(writeOffset, size,
-                mapRegion -> mapRegion.transferFrom(documentsArray, start, 0, end - start)
-        );
-
-        start = end;
-        return offsetForBlock;
-    }
-}
--- a/code/index/index-reverse/java/nu/marginalia/index/positions/PositionsFileReader.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/positions/PositionsFileReader.java
@@ -1,43 +0,0 @@
-package nu.marginalia.index.positions;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.lang.foreign.Arena;
-import java.nio.channels.FileChannel;
-import java.nio.file.Path;
-import java.nio.file.StandardOpenOption;
-
-public class PositionsFileReader implements AutoCloseable {
-    private final FileChannel positions;
-    private static final Logger logger = LoggerFactory.getLogger(PositionsFileReader.class);
-
-    public PositionsFileReader(Path positionsFile) throws IOException {
-        this.positions = FileChannel.open(positionsFile, StandardOpenOption.READ);
-    }
-
-    /** Get the positions for a term in the index, as pointed out by the encoded offset;
-     * intermediate buffers are allocated from the provided arena allocator. */
-    public TermData getTermData(Arena arena, long sizeEncodedOffset) {
-        int length = PositionCodec.decodeSize(sizeEncodedOffset);
-        long offset = PositionCodec.decodeOffset(sizeEncodedOffset);
-
-        var segment = arena.allocate(length);
-        var buffer = segment.asByteBuffer();
-
-        try {
-            positions.read(buffer, offset);
-        } catch (IOException e) {
-            throw new RuntimeException(e);
-        }
-
-        return new TermData(buffer);
-    }
-
-    @Override
-    public void close() throws IOException {
-        positions.close();
-    }
-
-}
--- a/code/index/index-reverse/java/nu/marginalia/index/query/ReverseIndexRejectFilter.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/query/ReverseIndexRejectFilter.java
@@ -1,28 +0,0 @@
-package nu.marginalia.index.query;
-
-import nu.marginalia.array.page.LongQueryBuffer;
-import nu.marginalia.btree.BTreeReader;
-import nu.marginalia.index.query.filter.QueryFilterStepIf;
-
-public record ReverseIndexRejectFilter(BTreeReader range) implements QueryFilterStepIf {
-
-    @Override
-    public void apply(LongQueryBuffer buffer) {
-        range.rejectEntries(buffer);
-        buffer.finalizeFiltering();
-    }
-
-    public boolean test(long id) {
-        return range.findEntry(id) < 0;
-    }
-
-    @Override
-    public double cost() {
-        return range.numEntries();
-    }
-
-    @Override
-    public String describe() {
-        return "ReverseIndexRejectFilter[]";
-    }
-}
--- a/code/index/index-reverse/java/nu/marginalia/index/query/ReverseIndexRetainFilter.java
+++ b/code/index/index-reverse/java/nu/marginalia/index/query/ReverseIndexRetainFilter.java
@@ -1,28 +0,0 @@
-package nu.marginalia.index.query;
-
-import nu.marginalia.array.page.LongQueryBuffer;
-import nu.marginalia.btree.BTreeReader;
-import nu.marginalia.index.query.filter.QueryFilterStepIf;
-
-public record ReverseIndexRetainFilter(BTreeReader range, String name, long wordId) implements QueryFilterStepIf {
-
-    @Override
-    public void apply(LongQueryBuffer buffer) {
-        range.retainEntries(buffer);
-        buffer.finalizeFiltering();
-    }
-
-    public boolean test(long id) {
-        return range.findEntry(id) >= 0;
-    }
-
-    @Override
-    public double cost() {
-        return range.numEntries();
-    }
-
-    @Override
-    public String describe() {
-        return "Retain:" + name + "/" + wordId;
-    }
-}
--- a/code/index/index-reverse/readme.md
+++ b/code/index/index-reverse/readme.md
@@ -1,56 +0,0 @@
-# Reverse Index
-
-The reverse index contains a mapping from word to document id. 
-
-There are two tiers of this index.
-
-* A priority index which only indexes terms that are flagged with priority flags<sup>1</sup>.
-* A full index that indexes all terms. 
-
-The full index also provides access to term-level metadata, while the priority index is 
-a binary index that only offers information about which documents has a specific word.
-
-The priority index is also compressed, while the full index at this point is not.
-
-[1] See WordFlags in [common/model](../../common/model/) and
-KeywordMetadata in [converting-process/ft-keyword-extraction](../../processes/converting-process/ft-keyword-extraction).
-
-## Construction
-
-The reverse index is constructed by first building a series of preindexes.
-Preindexes consist of a Segment and a Documents object.  The segment contains
-information about which word identifiers are present and how many, and the
-documents contain information about in which documents the words can be found.
-
-![Memory layout illustrations](./preindex.svg)
-
-These would typically not fit in RAM, so the index journal is paged 
-and the preindexes are constructed small enough to fit in memory, and
-then merged.  Merging sorted arrays is a very fast operation that does
-not require additional RAM.
-
-![Illustration of successively merged preindex files](./merging.svg)
-
-Once merged into  one large preindex, indexes are added to the preindex data
-to form a finalized reverse index. 
-
-![Illustration of the data layout of the finalized index](index.svg)
-## Central Classes
-
-Full index:
-* [FullPreindex](java/nu/marginalia/index/construction/full/FullPreindex.java) intermediate reverse index state.
-* [FullIndexConstructor](java/nu/marginalia/index/construction/full/FullIndexConstructor.java) constructs the index.
-* [FullReverseIndexReader](java/nu/marginalia/index/FullReverseIndexReader.java) interrogates the index.
-
-Prio index:
-* [PrioPreindex](java/nu/marginalia/index/construction/prio/PrioPreindex.java) intermediate reverse index state.
-* [PrioIndexConstructor](java/nu/marginalia/index/construction/prio/PrioIndexConstructor.java) constructs the index.
-* [PrioIndexReader](java/nu/marginalia/index/PrioReverseIndexReader.java) interrogates the index.
-
-
-## See Also
-
-* [index-journal](../index-journal)
-* [index-forward](../index-forward)
-* [libraries/btree](../../libraries/btree)
-* [libraries/array](../../libraries/array)
--- a/code/index/index-reverse/test/nu/marginalia/index/PositionsFileReaderTest.java
+++ b/code/index/index-reverse/test/nu/marginalia/index/PositionsFileReaderTest.java
@@ -1,63 +0,0 @@
-package nu.marginalia.index;
-
-import it.unimi.dsi.fastutil.ints.IntList;
-import nu.marginalia.index.construction.PositionsFileConstructor;
-import nu.marginalia.index.positions.PositionsFileReader;
-import nu.marginalia.index.positions.TermData;
-import nu.marginalia.sequence.VarintCodedSequence;
-import org.junit.jupiter.api.AfterEach;
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Test;
-
-import java.io.IOException;
-import java.lang.foreign.Arena;
-import java.nio.ByteBuffer;
-import java.nio.file.Files;
-import java.nio.file.Path;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-
-class PositionsFileReaderTest {
-
-    Path file;
-
-    @BeforeEach
-    void setUp() throws IOException {
-        file = Files.createTempFile("positions", "dat");
-    }
-    @AfterEach
-    void tearDown() throws IOException {
-        Files.delete(file);
-    }
-
-    @Test
-    void getTermData() throws IOException {
-        ByteBuffer workArea = ByteBuffer.allocate(8192);
-        long key1, key2, key3;
-        try (PositionsFileConstructor constructor = new PositionsFileConstructor(file)) {
-            key1 = constructor.add((byte) 43, VarintCodedSequence.generate(1, 2, 3).buffer());
-            key2 = constructor.add((byte) 51, VarintCodedSequence.generate(2, 3, 5, 1000, 5000, 20241).buffer());
-            key3 = constructor.add((byte) 61, VarintCodedSequence.generate(3, 5, 7).buffer());
-        }
-
-        System.out.println("key1: " + Long.toHexString(key1));
-        System.out.println("key2: " + Long.toHexString(key2));
-        System.out.println("key3: " + Long.toHexString(key3));
-
-        try (Arena arena = Arena.ofConfined();
-            PositionsFileReader reader = new PositionsFileReader(file))
-        {
-            TermData data1 = reader.getTermData(arena, key1);
-            assertEquals(43, data1.flags());
-            assertEquals(IntList.of( 1, 2, 3), data1.positions().values());
-
-            TermData data2 = reader.getTermData(arena, key2);
-            assertEquals(51, data2.flags());
-            assertEquals(IntList.of(2, 3, 5, 1000, 5000, 20241), data2.positions().values());
-
-            TermData data3 = reader.getTermData(arena, key3);
-            assertEquals(61, data3.flags());
-            assertEquals(IntList.of(3, 5, 7), data3.positions().values());
-        }
-    }
-}
--- a/code/index/index-reverse/test/nu/marginalia/index/ReverseIndexDebugTest.java
+++ b/code/index/index-reverse/test/nu/marginalia/index/ReverseIndexDebugTest.java
@@ -1,49 +0,0 @@
-package nu.marginalia.index;
-
-import nu.marginalia.array.LongArrayFactory;
-import nu.marginalia.btree.BTreeReader;
-import org.junit.jupiter.api.Disabled;
-import org.junit.jupiter.api.Test;
-
-import java.io.IOException;
-import java.nio.file.Path;
-
-import static org.junit.jupiter.api.Assertions.*;
-
-public class ReverseIndexDebugTest {
-    @Test
-    @Disabled // this is a debugging utility
-    public void debug() throws IOException {
-        long problemWord = -7909917549851025932L;
-        long problemDoc = 9079256848846028801L;
-
-        var words = LongArrayFactory.mmapForReadingConfined(Path.of("/home/vlofgren/Code/MarginaliaSearch/run/node-1/index/ir/rev-words.dat"));
-        var documents = LongArrayFactory.mmapForReadingConfined(Path.of("/home/vlofgren/Code/MarginaliaSearch/run/node-1/index/ir/rev-docs.dat"));
-
-        var wordsBTreeReader = new BTreeReader(words, ReverseIndexParameters.wordsBTreeContext, 0);
-        var wordsDataOffset = wordsBTreeReader.getHeader().dataOffsetLongs();
-
-        long wordOffset = wordsBTreeReader.findEntry(problemWord);
-        assertTrue(wordOffset >= 0);
-
-        var docsReader = new BTreeReader(documents, ReverseIndexParameters.prioDocsBTreeContext, wordOffset);
-
-        // We find problemDoc even though it doesn't exist in the document range
-        long docOffset = docsReader.findEntry(problemDoc);
-        assertTrue(docOffset < 0);
-
-        // We know it doesn't exist because when we check, we can't find it,
-        // either by iterating...
-        var dataRange = docsReader.data();
-        System.out.println(dataRange.size());
-        for (int i = 0; i < dataRange.size(); i+=2) {
-
-            assertNotEquals(problemDoc, dataRange.get(i));
-        }
-
-        // or by binary searching
-        assertTrue(dataRange.binarySearchN(2, problemDoc, 0, dataRange.size()) < 0);
-
-
-    }
-}
--- a/code/index/index-reverse/test/nu/marginalia/index/construction/full/FullPreindexFinalizeTest.java
+++ b/code/index/index-reverse/test/nu/marginalia/index/construction/full/FullPreindexFinalizeTest.java
@@ -1,149 +0,0 @@
-
-package nu.marginalia.index.construction.full;
-
-import nu.marginalia.array.LongArrayFactory;
-import nu.marginalia.btree.model.BTreeHeader;
-import nu.marginalia.hash.MurmurHash3_128;
-import nu.marginalia.index.construction.DocIdRewriter;
-import nu.marginalia.index.construction.PositionsFileConstructor;
-import org.junit.jupiter.api.AfterEach;
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Test;
-
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.util.ArrayList;
-import java.util.List;
-
-import static nu.marginalia.index.construction.full.TestJournalFactory.EntryDataWithWordMeta;
-import static nu.marginalia.index.construction.full.TestJournalFactory.wm;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-
-class FullPreindexFinalizeTest {
-    TestJournalFactory journalFactory;
-    Path positionsFile;
-    Path countsFile;
-    Path wordsIdFile;
-    Path docsFile;
-    Path tempDir;
-
-    @BeforeEach
-    public void setUp() throws IOException  {
-        journalFactory = new TestJournalFactory();
-
-        positionsFile = Files.createTempFile("positions", ".dat");
-        countsFile = Files.createTempFile("counts", ".dat");
-        wordsIdFile = Files.createTempFile("words", ".dat");
-        docsFile = Files.createTempFile("docs", ".dat");
-        tempDir = Files.createTempDirectory("sort");
-    }
-
-    @AfterEach
-    public void tearDown() throws IOException {
-        journalFactory.clear();
-
-        Files.deleteIfExists(countsFile);
-        Files.deleteIfExists(wordsIdFile);
-        List<Path> contents = new ArrayList<>();
-        Files.list(tempDir).forEach(contents::add);
-        for (var tempFile : contents) {
-            Files.delete(tempFile);
-        }
-        Files.delete(tempDir);
-    }
-
-    MurmurHash3_128 hash = new MurmurHash3_128();
-    long termId(String keyword) {
-        return hash.hashKeyword(keyword);
-    }
-
-    @Test
-    public void testFinalizeSimple() throws IOException {
-        var reader = journalFactory.createReader(new EntryDataWithWordMeta(100, 101, wm(50, 51)));
-        var preindex = FullPreindex.constructPreindex(reader,
-                new PositionsFileConstructor(positionsFile),
-                DocIdRewriter.identity(), tempDir);
-
-
-        preindex.finalizeIndex(tempDir.resolve( "docs.dat"), tempDir.resolve("words.dat"));
-        preindex.delete();
-
-        Path wordsFile = tempDir.resolve("words.dat");
-        Path docsFile =  tempDir.resolve("docs.dat");
-
-        assertTrue(Files.exists(wordsFile));
-        assertTrue(Files.exists(docsFile));
-
-        System.out.println(Files.size(wordsFile));
-        System.out.println(Files.size(docsFile));
-
-        var docsArray = LongArrayFactory.mmapForReadingConfined(docsFile);
-        var wordsArray = LongArrayFactory.mmapForReadingConfined(wordsFile);
-
-        var docsHeader = new BTreeHeader(docsArray, 0);
-        var wordsHeader = new BTreeHeader(wordsArray, 0);
-
-        assertEquals(1, docsHeader.numEntries());
-        assertEquals(1, wordsHeader.numEntries());
-
-        assertEquals(100, docsArray.get(docsHeader.dataOffsetLongs() + 0));
-        assertEquals(termId("50"), wordsArray.get(wordsHeader.dataOffsetLongs()));
-    }
-
-
-    @Test
-    public void testFinalizeSimple2x2() throws IOException {
-        var reader = journalFactory.createReader(
-                new EntryDataWithWordMeta(100, 101, wm(50, 51)),
-                new EntryDataWithWordMeta(101, 101, wm(51, 52))
-                );
-
-        var preindex = FullPreindex.constructPreindex(reader,
-                new PositionsFileConstructor(positionsFile),
-                DocIdRewriter.identity(), tempDir);
-
-        preindex.finalizeIndex(tempDir.resolve( "docs.dat"), tempDir.resolve("words.dat"));
-        preindex.delete();
-
-        Path wordsFile = tempDir.resolve("words.dat");
-        Path docsFile =  tempDir.resolve("docs.dat");
-
-        assertTrue(Files.exists(wordsFile));
-        assertTrue(Files.exists(docsFile));
-
-        System.out.println(Files.size(wordsFile));
-        System.out.println(Files.size(docsFile));
-
-        var docsArray = LongArrayFactory.mmapForReadingConfined(docsFile);
-        var wordsArray = LongArrayFactory.mmapForReadingConfined(wordsFile);
-
-
-        var wordsHeader = new BTreeHeader(wordsArray, 0);
-
-        System.out.println(wordsHeader);
-
-        assertEquals(2, wordsHeader.numEntries());
-
-        long offset1 = wordsArray.get(wordsHeader.dataOffsetLongs() + 1);
-        long offset2 = wordsArray.get(wordsHeader.dataOffsetLongs() + 3);
-
-        assertEquals(termId("50"), wordsArray.get(wordsHeader.dataOffsetLongs()));
-        assertEquals(termId("50"), wordsArray.get(wordsHeader.dataOffsetLongs()));
-
-        BTreeHeader docsHeader;
-
-        docsHeader = new BTreeHeader(docsArray, offset1);
-        System.out.println(docsHeader);
-        assertEquals(1, docsHeader.numEntries());
-
-        assertEquals(100, docsArray.get(docsHeader.dataOffsetLongs() + 0));
-
-        docsHeader = new BTreeHeader(docsArray, offset2);
-        System.out.println(docsHeader);
-        assertEquals(1, docsHeader.numEntries());
-
-        assertEquals(101, docsArray.get(docsHeader.dataOffsetLongs() + 0));
-    }
-}
--- a/code/index/index-reverse/index.svg
+++ b/code/index/index-reverse/index.svg
--- a/code/index/java/nu/marginalia/index/index/CombinedIndexReader.java
+++ b/code/index/java/nu/marginalia/index/index/CombinedIndexReader.java
@@ -1,22 +1,24 @@
-package nu.marginalia.index.index;
+package nu.marginalia.index;

 import it.unimi.dsi.fastutil.longs.LongArrayList;
 import it.unimi.dsi.fastutil.longs.LongList;
 import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
 import it.unimi.dsi.fastutil.longs.LongSet;
 import nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates;
-import nu.marginalia.index.FullReverseIndexReader;
-import nu.marginalia.index.PrioReverseIndexReader;
+import nu.marginalia.api.searchquery.model.query.SpecificationLimitType;
+import nu.marginalia.array.page.LongQueryBuffer;
 import nu.marginalia.index.forward.ForwardIndexReader;
 import nu.marginalia.index.forward.spans.DocumentSpans;
+import nu.marginalia.index.model.CombinedDocIdList;
 import nu.marginalia.index.model.QueryParams;
-import nu.marginalia.index.model.SearchTerms;
-import nu.marginalia.index.query.IndexQuery;
-import nu.marginalia.index.query.IndexQueryBuilder;
-import nu.marginalia.index.query.filter.QueryFilterStepIf;
-import nu.marginalia.index.query.limit.SpecificationLimitType;
-import nu.marginalia.index.results.model.ids.CombinedDocIdList;
-import nu.marginalia.index.results.model.ids.TermMetadataList;
+import nu.marginalia.index.model.SearchContext;
+import nu.marginalia.index.model.TermMetadataList;
+import nu.marginalia.index.reverse.FullReverseIndexReader;
+import nu.marginalia.index.reverse.IndexLanguageContext;
+import nu.marginalia.index.reverse.PrioReverseIndexReader;
+import nu.marginalia.index.reverse.query.IndexQuery;
+import nu.marginalia.index.reverse.query.IndexSearchBudget;
+import nu.marginalia.index.reverse.query.filter.QueryFilterStepIf;
 import nu.marginalia.model.id.UrlIdCodec;
 import nu.marginalia.model.idx.DocumentMetadata;
 import org.slf4j.Logger;
@@ -28,6 +30,7 @@ import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
 import java.util.function.Predicate;

 /** A reader for the combined forward and reverse indexes.
@@ -51,25 +54,33 @@ public class CombinedIndexReader {
        this.reverseIndexPriorityReader = reverseIndexPriorityReader;
    }

-    public IndexQueryBuilderImpl newQueryBuilder(IndexQuery query) {
-        return new IndexQueryBuilderImpl(reverseIndexFullReader, query);
+    public IndexLanguageContext createLanguageContext(String languageIsoCode) {
+        return new IndexLanguageContext(languageIsoCode,
+                reverseIndexFullReader.getWordLexicon(languageIsoCode),
+                reverseIndexPriorityReader.getWordLexicon(languageIsoCode)
+        );
    }

-    public QueryFilterStepIf hasWordFull(long termId) {
-        return reverseIndexFullReader.also(termId);
+    public IndexQueryBuilder newQueryBuilder(IndexLanguageContext context, IndexQuery query) {
+        return new IndexQueryBuilder(reverseIndexFullReader, context, query);
+    }
+
+    public QueryFilterStepIf hasWordFull(IndexLanguageContext languageContext, long termId, IndexSearchBudget budget) {
+        return reverseIndexFullReader.also(languageContext, termId, budget);
    }

    /** Creates a query builder for terms in the priority index */
-    public IndexQueryBuilder findPriorityWord(long wordId) {
-        return newQueryBuilder(new IndexQuery(reverseIndexPriorityReader.documents(wordId)))
-                .withSourceTerms(wordId);
+    public IndexQueryBuilder findPriorityWord(IndexLanguageContext languageContext, long wordId) {
+        IndexQuery query = new IndexQuery(reverseIndexPriorityReader.documents(languageContext, wordId), true);
+
+        return newQueryBuilder(languageContext, query).withSourceTerms(wordId);
    }

    /** Creates a query builder for terms in the full index */
-    public IndexQueryBuilder findFullWord(long wordId) {
-        return newQueryBuilder(
-                new IndexQuery(reverseIndexFullReader.documents(wordId)))
-                .withSourceTerms(wordId);
+    public IndexQueryBuilder findFullWord(IndexLanguageContext languageContext, long wordId) {
+        IndexQuery query = new IndexQuery(reverseIndexFullReader.documents(languageContext, wordId), false);
+
+        return newQueryBuilder(languageContext, query).withSourceTerms(wordId);
    }

    /** Creates a parameter matching filter step for the provided parameters */
@@ -78,21 +89,32 @@ public class CombinedIndexReader {
    }

    /** Returns the number of occurrences of the word in the full index */
-    public int numHits(long word) {
-        return reverseIndexFullReader.numDocuments(word);
+    public int numHits(IndexLanguageContext languageContext, long word) {
+        return reverseIndexFullReader.numDocuments(languageContext, word);
    }

-    public List<IndexQuery> createQueries(SearchTerms terms, QueryParams params) {
+    /** Reset caches and buffers */
+    public void reset() {
+        reverseIndexFullReader.reset();
+    }
+
+    public List<IndexQuery> createQueries(SearchContext context) {

        if (!isLoaded()) {
            logger.warn("Index reader not ready");
            return Collections.emptyList();
        }

-        List<IndexQueryBuilder> queryHeads = new ArrayList<>(10);
+        final IndexLanguageContext languageContext = context.languageContext;
+        final long[] termPriority = context.sortedDistinctIncludes((a,b) -> {
+            return Long.compare(
+                numHits(languageContext, a),
+                numHits(languageContext, b)
+            );
+        });

-        final long[] termPriority = terms.sortedDistinctIncludes(this::compareKeywords);
-        List<LongSet> paths = CompiledQueryAggregates.queriesAggregate(terms.compiledQuery());
+        List<IndexQueryBuilder> queryHeads = new ArrayList<>(10);
+        List<LongSet> paths = CompiledQueryAggregates.queriesAggregate(context.compiledQueryIds);

        // Remove any paths that do not contain all prioritized terms, as this means
        // the term is missing from the index and can never be found
@@ -102,37 +124,27 @@ public class CombinedIndexReader {
            LongList elements = new LongArrayList(path);

            elements.sort((a, b) -> {
-                for (int i = 0; i < termPriority.length; i++) {
-                    if (termPriority[i] == a)
+                for (long l : termPriority) {
+                    if (l == a)
                        return -1;
-                    if (termPriority[i] == b)
+                    if (l == b)
                        return 1;
                }
                return 0;
            });

-            if (!SearchTerms.stopWords.contains(elements.getLong(0))) {
-                var head = findFullWord(elements.getLong(0));
+            var head = findFullWord(languageContext, elements.getLong(0));

-                for (int i = 1; i < elements.size(); i++) {
-                    long termId = elements.getLong(i);
-
-                    // if a stop word is present in the query, skip the step of requiring it to be in the document,
-                    // we'll assume it's there and save IO
-                    if (SearchTerms.stopWords.contains(termId)) {
-                        continue;
-                    }
-
-                    head.addInclusionFilter(hasWordFull(termId));
-                }
-                queryHeads.add(head);
+            for (int i = 1; i < elements.size(); i++) {
+                head.addInclusionFilter(hasWordFull(languageContext, elements.getLong(i), context.budget));
            }
+            queryHeads.add(head);

            // If there are few paths, we can afford to check the priority index as well
            if (paths.size() < 4) {
-                var prioHead = findPriorityWord(elements.getLong(0));
+                var prioHead = findPriorityWord(languageContext, elements.getLong(0));
                for (int i = 1; i < elements.size(); i++) {
-                    prioHead.addInclusionFilter(hasWordFull(elements.getLong(i)));
+                    prioHead.addInclusionFilter(hasWordFull(languageContext, elements.getLong(i), context.budget));
                }
                queryHeads.add(prioHead);
            }
@@ -142,17 +154,17 @@ public class CombinedIndexReader {
        for (var query : queryHeads) {

            // Advice terms are a special case, mandatory but not ranked, and exempt from re-writing
-            for (long term : terms.advice()) {
-                query = query.also(term);
+            for (long term : context.termIdsAdvice) {
+                query = query.also(term, context.budget);
            }

-            for (long term : terms.excludes()) {
-                query = query.not(term);
+            for (long term : context.termIdsExcludes) {
+                query = query.not(term, context.budget);
            }

            // Run these filter steps last, as they'll worst-case cause as many page faults as there are
            // items in the buffer
-            query.addInclusionFilter(filterForParams(params));
+            query.addInclusionFilter(filterForParams(context.queryParams));
        }

        return queryHeads
@@ -166,23 +178,20 @@ public class CombinedIndexReader {
        return permittedTerms::containsAll;
    }

-    private int compareKeywords(long a, long b) {
-        return Long.compare(
-                numHits(a),
-                numHits(b)
-        );
-    }
    /** Returns the number of occurrences of the word in the priority index */
-    public int numHitsPrio(long word) {
-        return reverseIndexPriorityReader.numDocuments(word);
+    public int numHitsPrio(IndexLanguageContext languageContext, long word) {
+        return reverseIndexPriorityReader.numDocuments(languageContext, word);
    }

    /** Retrieves the term metadata for the specified word for the provided documents */
-    public TermMetadataList getTermMetadata(Arena arena,
-                                            long wordId,
-                                            CombinedDocIdList docIds)
+    public TermMetadataList[] getTermMetadata(Arena arena,
+                                              IndexLanguageContext languageContext,
+                                              IndexSearchBudget budget,
+                                              long[] wordIds,
+                                              CombinedDocIdList docIds)
+    throws TimeoutException
    {
-        return new TermMetadataList(reverseIndexFullReader.getTermData(arena, wordId, docIds.array()));
+        return reverseIndexFullReader.getTermData(arena, languageContext, budget, wordIds, docIds);
    }

    /** Retrieves the document metadata for the specified document */
@@ -205,14 +214,14 @@ public class CombinedIndexReader {
        return forwardIndexReader.getDocumentSize(docId);
    }

-    /** Retrieves the document spans for the specified document */
-    public DocumentSpans getDocumentSpans(Arena arena, long docId) {
-        return forwardIndexReader.getDocumentSpans(arena, docId);
+    /** Retrieves the document spans for the specified documents */
+    public DocumentSpans[] getDocumentSpans(Arena arena, IndexSearchBudget budget, CombinedDocIdList docIds) throws TimeoutException {
+        return forwardIndexReader.getDocumentSpans(arena, budget, docIds);
    }

    /** Close the indexes (this is not done immediately)
     * */
-    public void close() throws InterruptedException {
+    public void close() {
       /* Delay the invocation of close method to allow for a clean shutdown of the service.
        *
        * This is especially important when using Unsafe-based LongArrays, since we have
@@ -227,7 +236,7 @@ public class CombinedIndexReader {
    }


-    private void delayedCall(Runnable call, Duration delay) throws InterruptedException {
+    private void delayedCall(Runnable call, Duration delay) {
        Thread.ofPlatform().start(() -> {
            try {
                TimeUnit.SECONDS.sleep(delay.toSeconds());
@@ -248,25 +257,47 @@ public class CombinedIndexReader {
 class ParamMatchingQueryFilter implements QueryFilterStepIf {
    private final QueryParams params;
    private final ForwardIndexReader forwardIndexReader;
-
+    private final boolean imposesMetaConstraint;
    public ParamMatchingQueryFilter(QueryParams params,
                                    ForwardIndexReader forwardIndexReader)
    {
        this.params = params;
        this.forwardIndexReader = forwardIndexReader;
+        this.imposesMetaConstraint = params.imposesDomainMetadataConstraint();
    }

    @Override
+    public void apply(LongQueryBuffer buffer) {
+        if (!imposesMetaConstraint && !params.searchSet().imposesConstraint()) {
+            return;
+        }
+
+        while (buffer.hasMore()) {
+            if (test(buffer.currentValue())) {
+                buffer.retainAndAdvance();
+            }
+            else {
+                buffer.rejectAndAdvance();
+            }
+        }
+
+        buffer.finalizeFiltering();
+    }
+
    public boolean test(long combinedId) {
        long docId = UrlIdCodec.removeRank(combinedId);
        int domainId = UrlIdCodec.getDomainId(docId);

-        long meta = forwardIndexReader.getDocMeta(docId);
-
-        if (!validateDomain(domainId, meta)) {
+        if (!validateDomain(domainId)) {
            return false;
        }

+        if (!imposesMetaConstraint) {
+            return true;
+        }
+
+        long meta = forwardIndexReader.getDocMeta(docId);
+
        if (!validateQuality(meta)) {
            return false;
        }
@@ -286,8 +317,8 @@ class ParamMatchingQueryFilter implements QueryFilterStepIf {
        return true;
    }

-    private boolean validateDomain(int domainId, long meta) {
-        return params.searchSet().contains(domainId, meta);
+    private boolean validateDomain(int domainId) {
+        return params.searchSet().contains(domainId);
    }

    private boolean validateQuality(long meta) {
@@ -338,4 +369,5 @@ class ParamMatchingQueryFilter implements QueryFilterStepIf {
    public String describe() {
        return getClass().getSimpleName();
    }
+
 }
--- a/code/processes/index-constructor-process/java/nu/marginalia/index/IndexConstructorMain.java
+++ b/code/processes/index-constructor-process/java/nu/marginalia/index/IndexConstructorMain.java
@@ -3,16 +3,18 @@ package nu.marginalia.index;
 import com.google.inject.Guice;
 import com.google.inject.Inject;
 import nu.marginalia.IndexLocations;
-import nu.marginalia.index.construction.full.FullIndexConstructor;
-import nu.marginalia.index.construction.prio.PrioIndexConstructor;
-import nu.marginalia.index.domainrankings.DomainRankings;
-import nu.marginalia.index.forward.ForwardIndexFileNames;
+import nu.marginalia.index.config.IndexFileName;
 import nu.marginalia.index.forward.construction.ForwardIndexConverter;
 import nu.marginalia.index.journal.IndexJournal;
+import nu.marginalia.index.reverse.construction.full.FullIndexConstructor;
+import nu.marginalia.index.reverse.construction.prio.PrioIndexConstructor;
+import nu.marginalia.index.searchset.DomainRankings;
 import nu.marginalia.model.gson.GsonFactory;
 import nu.marginalia.model.id.UrlIdCodec;
 import nu.marginalia.mq.MessageQueueFactory;
+import nu.marginalia.mqapi.ProcessInboxNames;
 import nu.marginalia.mqapi.index.CreateIndexRequest;
+import nu.marginalia.mqapi.index.IndexName;
 import nu.marginalia.process.ProcessConfiguration;
 import nu.marginalia.process.ProcessConfigurationModule;
 import nu.marginalia.process.ProcessMainClass;
@@ -25,11 +27,9 @@ import org.slf4j.LoggerFactory;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.sql.SQLException;
+import java.util.Set;
 import java.util.concurrent.TimeUnit;

-import static nu.marginalia.mqapi.ProcessInboxNames.INDEX_CONSTRUCTOR_INBOX;
-
 public class IndexConstructorMain extends ProcessMainClass {
    private final FileStorageService fileStorageService;
    private final ProcessHeartbeatImpl heartbeat;
@@ -37,7 +37,7 @@ public class IndexConstructorMain extends ProcessMainClass {

    private static final Logger logger = LoggerFactory.getLogger(IndexConstructorMain.class);

-    public static void main(String[] args) throws Exception {
+    static void main(String[] args) throws Exception {
        Instructions<CreateIndexRequest> instructions = null;
        try {
            new org.mariadb.jdbc.Driver();
@@ -74,20 +74,20 @@ public class IndexConstructorMain extends ProcessMainClass {
                                ProcessConfiguration processConfiguration,
                                DomainRankings domainRankings) {

-        super(messageQueueFactory, processConfiguration, GsonFactory.get(), INDEX_CONSTRUCTOR_INBOX);
+        super(messageQueueFactory, processConfiguration, GsonFactory.get(), ProcessInboxNames.INDEX_CONSTRUCTOR_INBOX);

        this.fileStorageService = fileStorageService;
        this.heartbeat = heartbeat;
        this.domainRankings = domainRankings;
    }

-    private void run(CreateIndexRequest instructions) throws SQLException, IOException {
+    private void run(CreateIndexRequest instructions) throws IOException {
        heartbeat.start();

        switch (instructions.indexName()) {
-            case FORWARD      -> createForwardIndex();
-            case REVERSE_FULL -> createFullReverseIndex();
-            case REVERSE_PRIO -> createPrioReverseIndex();
+            case IndexName.FORWARD      -> createForwardIndex();
+            case IndexName.REVERSE_FULL -> createFullReverseIndex();
+            case IndexName.REVERSE_PRIO -> createPrioReverseIndex();
        }

        heartbeat.shutDown();
@@ -95,50 +95,74 @@ public class IndexConstructorMain extends ProcessMainClass {

    private void createFullReverseIndex() throws IOException {

-        Path outputFileDocs = ReverseIndexFullFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexFullFileNames.FileIdentifier.DOCS, ReverseIndexFullFileNames.FileVersion.NEXT);
-        Path outputFileWords = ReverseIndexFullFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexFullFileNames.FileIdentifier.WORDS, ReverseIndexFullFileNames.FileVersion.NEXT);
-        Path outputFilePositions = ReverseIndexFullFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexFullFileNames.FileIdentifier.POSITIONS, ReverseIndexFullFileNames.FileVersion.NEXT);
+        Path outputFileDocs = findNextFile(new IndexFileName.FullDocs());
+        Path outputFilePositions = findNextFile(new IndexFileName.FullPositions());
+
+        Files.deleteIfExists(outputFileDocs);
+        Files.deleteIfExists(outputFilePositions);

        Path workDir = IndexLocations.getIndexConstructionArea(fileStorageService);
        Path tmpDir = workDir.resolve("tmp");

        if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir);

-        var constructor = new FullIndexConstructor(
-                outputFileDocs,
-                outputFileWords,
-                outputFilePositions,
-                this::addRankToIdEncoding,
-                tmpDir);
+        Set<String> languageIsoCodes = IndexJournal.findJournal(workDir)
+                .map(IndexJournal::languages)
+                .orElseGet(Set::of);

-        constructor.createReverseIndex(heartbeat, "createReverseIndexFull", workDir);
+        for (String languageIsoCode : languageIsoCodes) {
+            Path outputFileWords = findNextFile(new IndexFileName.FullWords(languageIsoCode));

+            FullIndexConstructor constructor = new FullIndexConstructor(
+                    languageIsoCode,
+                    outputFileDocs,
+                    outputFileWords,
+                    outputFilePositions,
+                    this::addRankToIdEncoding,
+                    tmpDir);
+
+            String processName = "createReverseIndexFull[%s]".formatted(languageIsoCode);
+
+            constructor.createReverseIndex(heartbeat, processName, workDir);
+        }
    }

    private void createPrioReverseIndex() throws IOException {

-        Path outputFileDocs = ReverseIndexPrioFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexPrioFileNames.FileIdentifier.DOCS, ReverseIndexPrioFileNames.FileVersion.NEXT);
-        Path outputFileWords = ReverseIndexPrioFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexPrioFileNames.FileIdentifier.WORDS, ReverseIndexPrioFileNames.FileVersion.NEXT);
+        Path outputFileDocs = findNextFile(new IndexFileName.PrioDocs());
+        Files.deleteIfExists(outputFileDocs);

        Path workDir = IndexLocations.getIndexConstructionArea(fileStorageService);
        Path tmpDir = workDir.resolve("tmp");

-        var constructor = new PrioIndexConstructor(
-                outputFileDocs,
-                outputFileWords,
-                this::addRankToIdEncoding,
-                tmpDir);
+        Set<String> languageIsoCodes = IndexJournal.findJournal(workDir)
+                .map(IndexJournal::languages)
+                .orElseGet(Set::of);

-        constructor.createReverseIndex(heartbeat, "createReverseIndexPrio", workDir);
+        for (String languageIsoCode : languageIsoCodes) {
+            Path outputFileWords = findNextFile(new IndexFileName.PrioWords(languageIsoCode));
+            Files.deleteIfExists(outputFileWords);
+
+            PrioIndexConstructor constructor = new PrioIndexConstructor(
+                    languageIsoCode,
+                    outputFileDocs,
+                    outputFileWords,
+                    this::addRankToIdEncoding,
+                    tmpDir);
+
+            String processName = "createReverseIndexPrio[%s]".formatted(languageIsoCode);
+
+            constructor.createReverseIndex(heartbeat, processName, workDir);
+        }
    }

    private void createForwardIndex() throws IOException {

        Path workDir = IndexLocations.getIndexConstructionArea(fileStorageService);

-        Path outputFileDocsId = ForwardIndexFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ForwardIndexFileNames.FileIdentifier.DOC_ID, ForwardIndexFileNames.FileVersion.NEXT);
-        Path outputFileDocsData = ForwardIndexFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ForwardIndexFileNames.FileIdentifier.DOC_DATA, ForwardIndexFileNames.FileVersion.NEXT);
-        Path outputFileSpansData = ForwardIndexFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ForwardIndexFileNames.FileIdentifier.SPANS_DATA, ForwardIndexFileNames.FileVersion.NEXT);
+        Path outputFileDocsId = findNextFile(new IndexFileName.ForwardDocIds());
+        Path outputFileDocsData = findNextFile(new IndexFileName.ForwardDocIds());
+        Path outputFileSpansData = findNextFile(new IndexFileName.ForwardSpansData());

        ForwardIndexConverter converter = new ForwardIndexConverter(heartbeat,
                outputFileDocsId,
@@ -151,6 +175,10 @@ public class IndexConstructorMain extends ProcessMainClass {
        converter.convert();
    }

+    private Path findNextFile(IndexFileName fileName) {
+        return IndexFileName.resolve(IndexLocations.getCurrentIndex(fileStorageService), fileName, IndexFileName.Version.NEXT);
+    }
+
    /** Append the domain's ranking to the high bits of a document ID
     * to ensure they're sorted in order of rank within the index.
     */
--- a/code/processes/index-constructor-process/java/nu/marginalia/index/IndexConstructorModule.java
+++ b/code/processes/index-constructor-process/java/nu/marginalia/index/IndexConstructorModule.java
@@ -4,7 +4,7 @@ import com.google.inject.AbstractModule;
 import com.google.inject.Provides;
 import com.google.inject.Singleton;
 import nu.marginalia.IndexLocations;
-import nu.marginalia.index.domainrankings.DomainRankings;
+import nu.marginalia.index.searchset.DomainRankings;
 import nu.marginalia.storage.FileStorageService;

 public class IndexConstructorModule extends AbstractModule {
--- a/code/index/java/nu/marginalia/index/IndexFactory.java
+++ b/code/index/java/nu/marginalia/index/IndexFactory.java
@@ -3,27 +3,34 @@ package nu.marginalia.index;
 import com.google.inject.Inject;
 import com.google.inject.Singleton;
 import nu.marginalia.IndexLocations;
-import nu.marginalia.index.forward.ForwardIndexFileNames;
+import nu.marginalia.index.config.IndexFileName;
 import nu.marginalia.index.forward.ForwardIndexReader;
-import nu.marginalia.index.index.CombinedIndexReader;
-import nu.marginalia.index.positions.PositionsFileReader;
+import nu.marginalia.index.reverse.FullReverseIndexReader;
+import nu.marginalia.index.reverse.PrioReverseIndexReader;
+import nu.marginalia.index.reverse.WordLexicon;
+import nu.marginalia.language.config.LanguageConfiguration;
+import nu.marginalia.language.model.LanguageDefinition;
 import nu.marginalia.storage.FileStorageService;

 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.StandardCopyOption;
+import java.util.ArrayList;
+import java.util.List;

@Singleton
 public class IndexFactory {
    private final FileStorageService fileStorageService;
    private final Path liveStorage;
+    private final LanguageConfiguration languageConfiguration;

    @Inject
-    public IndexFactory(FileStorageService fileStorageService) {
+    public IndexFactory(FileStorageService fileStorageService, LanguageConfiguration languageConfiguration) {

        this.fileStorageService = fileStorageService;
        this.liveStorage = IndexLocations.getCurrentIndex(fileStorageService);
+        this.languageConfiguration = languageConfiguration;
    }

    public CombinedIndexReader getCombinedIndexReader() throws IOException {
@@ -39,47 +46,78 @@ public class IndexFactory {
    }

    public FullReverseIndexReader getReverseIndexReader() throws IOException {
+
+        Path docsFile = getCurrentPath(new IndexFileName.FullDocs());
+        Path positionsFile = getCurrentPath(new IndexFileName.FullPositions());
+
+        List<WordLexicon> wordLexicons = new ArrayList<>();
+
+        for (LanguageDefinition languageDefinition : languageConfiguration.languages()) {
+            String languageIsoCode = languageDefinition.isoCode();
+            Path wordsFile = getCurrentPath(new IndexFileName.FullWords(languageIsoCode));
+            if (Files.exists(wordsFile)) {
+                wordLexicons.add(new WordLexicon(languageIsoCode, wordsFile));
+            }
+        }
+
+
        return new FullReverseIndexReader("full",
-                ReverseIndexFullFileNames.resolve(liveStorage, ReverseIndexFullFileNames.FileIdentifier.WORDS, ReverseIndexFullFileNames.FileVersion.CURRENT),
-                ReverseIndexFullFileNames.resolve(liveStorage, ReverseIndexFullFileNames.FileIdentifier.DOCS, ReverseIndexFullFileNames.FileVersion.CURRENT),
-                new PositionsFileReader(ReverseIndexFullFileNames.resolve(liveStorage, ReverseIndexFullFileNames.FileIdentifier.POSITIONS, ReverseIndexFullFileNames.FileVersion.CURRENT))
+                wordLexicons,
+                docsFile,
+                positionsFile
        );
    }

    public PrioReverseIndexReader getReverseIndexPrioReader() throws IOException {
-        return new PrioReverseIndexReader("prio",
-                ReverseIndexPrioFileNames.resolve(liveStorage, ReverseIndexPrioFileNames.FileIdentifier.WORDS, ReverseIndexPrioFileNames.FileVersion.CURRENT),
-                ReverseIndexPrioFileNames.resolve(liveStorage, ReverseIndexPrioFileNames.FileIdentifier.DOCS, ReverseIndexPrioFileNames.FileVersion.CURRENT)
-        );
+
+        List<WordLexicon> wordLexicons = new ArrayList<>();
+
+        for (LanguageDefinition languageDefinition : languageConfiguration.languages()) {
+            String languageIsoCode = languageDefinition.isoCode();
+            Path wordsFile = getCurrentPath(new IndexFileName.PrioWords(languageIsoCode));
+            if (Files.exists(wordsFile)) {
+                wordLexicons.add(new WordLexicon(languageIsoCode, wordsFile));
+            }
+        }
+
+        Path docsFile = getCurrentPath(new IndexFileName.PrioDocs());
+
+        return new PrioReverseIndexReader("prio", wordLexicons, docsFile);
    }

    public ForwardIndexReader getForwardIndexReader() throws IOException {
-        return new ForwardIndexReader(
-                ForwardIndexFileNames.resolve(liveStorage, ForwardIndexFileNames.FileIdentifier.DOC_ID, ForwardIndexFileNames.FileVersion.CURRENT),
-                ForwardIndexFileNames.resolve(liveStorage, ForwardIndexFileNames.FileIdentifier.DOC_DATA, ForwardIndexFileNames.FileVersion.CURRENT),
-                ForwardIndexFileNames.resolve(liveStorage, ForwardIndexFileNames.FileIdentifier.SPANS_DATA, ForwardIndexFileNames.FileVersion.CURRENT)
-        );
+        Path docIdsFile = getCurrentPath(new IndexFileName.ForwardDocIds());
+        Path docDataFile = getCurrentPath(new IndexFileName.ForwardDocData());
+        Path spansFile = getCurrentPath(new IndexFileName.ForwardSpansData());
+
+        return new ForwardIndexReader(docIdsFile, docDataFile, spansFile);
+    }
+
+    private Path getCurrentPath(IndexFileName fileName) {
+        return IndexFileName.resolve(liveStorage, fileName, IndexFileName.Version.CURRENT);
    }

    /** Switches the current index to the next index */
    public void switchFiles() throws IOException {

-        for (var file : ReverseIndexFullFileNames.FileIdentifier.values()) {
+        for (var file : IndexFileName.forwardIndexFiles()) {
            switchFile(
-                    ReverseIndexFullFileNames.resolve(liveStorage, file, ReverseIndexFullFileNames.FileVersion.NEXT),
-                    ReverseIndexFullFileNames.resolve(liveStorage, file, ReverseIndexFullFileNames.FileVersion.CURRENT)
+                    IndexFileName.resolve(liveStorage, file, IndexFileName.Version.NEXT),
+                    IndexFileName.resolve(liveStorage, file, IndexFileName.Version.CURRENT)
            );
        }
-        for (var file : ReverseIndexPrioFileNames.FileIdentifier.values()) {
+
+        for (IndexFileName file : IndexFileName.revPrioIndexFiles(languageConfiguration)) {
            switchFile(
-                    ReverseIndexPrioFileNames.resolve(liveStorage, file, ReverseIndexPrioFileNames.FileVersion.NEXT),
-                    ReverseIndexPrioFileNames.resolve(liveStorage, file, ReverseIndexPrioFileNames.FileVersion.CURRENT)
+                    IndexFileName.resolve(liveStorage, file, IndexFileName.Version.NEXT),
+                    IndexFileName.resolve(liveStorage, file, IndexFileName.Version.CURRENT)
            );
        }
-        for (var file : ForwardIndexFileNames.FileIdentifier.values()) {
+
+        for (IndexFileName file : IndexFileName.revFullIndexFiles(languageConfiguration)) {
            switchFile(
-                    ForwardIndexFileNames.resolve(liveStorage, file, ForwardIndexFileNames.FileVersion.NEXT),
-                    ForwardIndexFileNames.resolve(liveStorage, file, ForwardIndexFileNames.FileVersion.CURRENT)
+                    IndexFileName.resolve(liveStorage, file, IndexFileName.Version.NEXT),
+                    IndexFileName.resolve(liveStorage, file, IndexFileName.Version.CURRENT)
            );
        }
    }
--- a/code/index/java/nu/marginalia/index/IndexGrpcService.java
+++ b/code/index/java/nu/marginalia/index/IndexGrpcService.java
@@ -5,29 +5,19 @@ import com.google.inject.Singleton;
 import io.grpc.Status;
 import io.grpc.stub.StreamObserver;
 import io.prometheus.client.Counter;
-import io.prometheus.client.Gauge;
 import io.prometheus.client.Histogram;
-import it.unimi.dsi.fastutil.longs.LongArrayList;
 import nu.marginalia.api.searchquery.IndexApiGrpc;
 import nu.marginalia.api.searchquery.RpcDecoratedResultItem;
 import nu.marginalia.api.searchquery.RpcIndexQuery;
-import nu.marginalia.api.searchquery.RpcResultRankingParameters;
-import nu.marginalia.api.searchquery.model.compiled.CompiledQuery;
-import nu.marginalia.api.searchquery.model.compiled.CompiledQueryLong;
-import nu.marginalia.api.searchquery.model.compiled.CqDataInt;
 import nu.marginalia.api.searchquery.model.query.SearchSpecification;
-import nu.marginalia.api.searchquery.model.results.ResultRankingContext;
-import nu.marginalia.array.page.LongQueryBuffer;
-import nu.marginalia.index.index.StatefulIndex;
-import nu.marginalia.index.model.SearchParameters;
-import nu.marginalia.index.model.SearchTerms;
-import nu.marginalia.index.query.IndexQuery;
-import nu.marginalia.index.query.IndexSearchBudget;
+import nu.marginalia.index.model.SearchContext;
 import nu.marginalia.index.results.IndexResultRankingService;
-import nu.marginalia.index.results.model.ids.CombinedDocIdList;
 import nu.marginalia.index.searchset.SearchSet;
 import nu.marginalia.index.searchset.SearchSetsService;
 import nu.marginalia.index.searchset.SmallSearchSet;
+import nu.marginalia.language.config.LanguageConfiguration;
+import nu.marginalia.language.keywords.KeywordHasher;
+import nu.marginalia.language.model.LanguageDefinition;
 import nu.marginalia.service.module.ServiceConfiguration;
 import nu.marginalia.service.server.DiscoverableService;
 import org.slf4j.Logger;
@@ -35,14 +25,9 @@ import org.slf4j.LoggerFactory;
 import org.slf4j.Marker;
 import org.slf4j.MarkerFactory;

-import java.util.BitSet;
+import java.util.HashMap;
 import java.util.List;
-import java.util.concurrent.ArrayBlockingQueue;
-import java.util.concurrent.Executor;
-import java.util.concurrent.Executors;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.atomic.AtomicLong;
+import java.util.Map;

@Singleton
 public class IndexGrpcService
@@ -51,6 +36,7 @@ public class IndexGrpcService
 {

    private final Logger logger = LoggerFactory.getLogger(getClass());
+    private final Map<String, KeywordHasher> keywordHasherByLangIso;

    // This marker is used to mark sensitive log messages that are related to queries
    // so that they can be filtered out in the production logging configuration
@@ -61,11 +47,6 @@ public class IndexGrpcService
            .help("Query timeout counter")
            .labelNames("node", "api")
            .register();
-    private static final Gauge wmsa_query_cost = Gauge.build()
-            .name("wmsa_index_query_cost")
-            .help("Computational cost of query")
-            .labelNames("node", "api")
-            .register();
    private static final Histogram wmsa_query_time = Histogram.build()
            .name("wmsa_index_query_time")
            .linearBuckets(0.05, 0.05, 15)
@@ -73,55 +54,60 @@ public class IndexGrpcService
            .help("Index-side query time")
            .register();

-    private static final Gauge wmsa_index_query_exec_stall_time = Gauge.build()
-            .name("wmsa_index_query_exec_stall_time")
-            .help("Execution stall time")
-            .labelNames("node")
-            .register();
-
-    private static final Gauge wmsa_index_query_exec_block_time = Gauge.build()
-            .name("wmsa_index_query_exec_block_time")
-            .help("Execution stall time")
-            .labelNames("node")
-            .register();
-
    private final StatefulIndex statefulIndex;
    private final SearchSetsService searchSetsService;

-    private final IndexResultRankingService resultValuator;
-
+    private final IndexResultRankingService rankingService;
    private final String nodeName;
-
-    private static final int indexValuationThreads = Integer.getInteger("index.valuationThreads", 16);
+    private final int nodeId;

    @Inject
    public IndexGrpcService(ServiceConfiguration serviceConfiguration,
+                            LanguageConfiguration languageConfiguration,
                            StatefulIndex statefulIndex,
                            SearchSetsService searchSetsService,
-                            IndexResultRankingService resultValuator)
+                            IndexResultRankingService rankingService)
    {
-        var nodeId = serviceConfiguration.node();
+        this.nodeId = serviceConfiguration.node();
        this.nodeName = Integer.toString(nodeId);
        this.statefulIndex = statefulIndex;
        this.searchSetsService = searchSetsService;
-        this.resultValuator = resultValuator;
+        this.rankingService = rankingService;
+        this.keywordHasherByLangIso = new HashMap<>();
+
+        for (LanguageDefinition definition : languageConfiguration.languages()) {
+            keywordHasherByLangIso.put(definition.isoCode(), definition.keywordHasher());
+        }
    }

    // GRPC endpoint
+
    public void query(RpcIndexQuery request,
                      StreamObserver<RpcDecoratedResultItem> responseObserver) {

        try {
-            var params = new SearchParameters(request, getSearchSet(request));
-
            long endTime = System.currentTimeMillis() + request.getQueryLimits().getTimeoutMs();
+            KeywordHasher hasher = findHasher(request);

            List<RpcDecoratedResultItem> results = wmsa_query_time
                    .labels(nodeName, "GRPC")
                    .time(() -> {
                        // Perform the search
                        try {
-                            return executeSearch(params);
+                            if (!statefulIndex.isLoaded()) {
+                                // Short-circuit if the index is not loaded, as we trivially know that there can be no results
+                                return List.of();
+                            }
+
+                            CombinedIndexReader indexReader = statefulIndex.get();
+
+                            SearchContext rankingContext =
+                                    SearchContext.create(indexReader, hasher, request, getSearchSet(request));
+
+                            IndexQueryExecution queryExecution =
+                                    new IndexQueryExecution(indexReader, rankingService, rankingContext, nodeId);
+
+                            return queryExecution.run();
                        }
                        catch (Exception ex) {
                            logger.error("Error in handling request", ex);
@@ -129,11 +115,6 @@ public class IndexGrpcService
                        }
                    });

-            // Prometheus bookkeeping
-            wmsa_query_cost
-                    .labels(nodeName, "GRPC")
-                    .set(params.getDataCost());
-
            if (System.currentTimeMillis() >= endTime) {
                wmsa_query_timeouts
                        .labels(nodeName, "GRPC")
@@ -153,11 +134,36 @@ public class IndexGrpcService
        }
    }

+    /** Keywords are translated to a numeric format via a 64 bit hash algorithm,
+     * which varies depends on the language.
+     */
+    private KeywordHasher findHasher(RpcIndexQuery request) {
+        KeywordHasher hasher = keywordHasherByLangIso.get(request.getLangIsoCode());
+        if (hasher != null)
+            return hasher;
+
+        hasher = keywordHasherByLangIso.get("en");
+        if (hasher != null)
+            return hasher;
+
+        throw new IllegalStateException("Could not find fallback keyword hasher for iso code 'en'");
+    }
+

    // exists for test access
    public List<RpcDecoratedResultItem> justQuery(SearchSpecification specsSet) {
        try {
-            return executeSearch(new SearchParameters(specsSet, getSearchSet(specsSet)));
+            if (!statefulIndex.isLoaded()) {
+                // Short-circuit if the index is not loaded, as we trivially know that there can be no results
+                return List.of();
+            }
+
+            CombinedIndexReader currentIndex = statefulIndex.get();
+
+            SearchContext context = SearchContext.create(currentIndex,
+                    keywordHasherByLangIso.get("en"), specsSet, getSearchSet(specsSet));
+
+            return new IndexQueryExecution(currentIndex, rankingService, context, 1).run();
        }
        catch (Exception ex) {
            logger.error("Error in handling request", ex);
@@ -183,262 +189,6 @@ public class IndexGrpcService
        return searchSetsService.getSearchSetByName(request.getSearchSetIdentifier());
    }

-    // accessible for tests
-    public List<RpcDecoratedResultItem> executeSearch(SearchParameters params) throws Exception {
-
-        if (!statefulIndex.isLoaded()) {
-            // Short-circuit if the index is not loaded, as we trivially know that there can be no results
-            return List.of();
-        }
-
-        ResultRankingContext rankingContext = createRankingContext(params.rankingParams,
-                params.compiledQuery,
-                params.compiledQueryIds);
-
-        var queryExecution = new QueryExecution(rankingContext, params.fetchSize);
-
-        List<RpcDecoratedResultItem> ret = queryExecution.run(params);
-
-        wmsa_index_query_exec_block_time
-                .labels(nodeName)
-                .set(queryExecution.getBlockTime() / 1000.);
-        wmsa_index_query_exec_stall_time
-                .labels(nodeName)
-                .set(queryExecution.getStallTime() / 1000.);
-
-        return ret;
-    }
-
-    /** This class is responsible for ranking the results and adding the best results to the
-     * resultHeap, which depending on the state of the indexLookup threads may or may not block
-     */
-    private ResultRankingContext createRankingContext(RpcResultRankingParameters rankingParams,
-                                                      CompiledQuery<String> compiledQuery,
-                                                      CompiledQueryLong compiledQueryIds)
-    {
-
-        int[] full = new int[compiledQueryIds.size()];
-        int[] prio = new int[compiledQueryIds.size()];
-
-        BitSet ngramsMask = new BitSet(compiledQuery.size());
-        BitSet regularMask = new BitSet(compiledQuery.size());
-
-        var currentIndex = statefulIndex.get();
-
-        for (int idx = 0; idx < compiledQueryIds.size(); idx++) {
-            long id = compiledQueryIds.at(idx);
-            full[idx] = currentIndex.numHits(id);
-            prio[idx] = currentIndex.numHitsPrio(id);
-
-            if (compiledQuery.at(idx).contains("_")) {
-                ngramsMask.set(idx);
-            }
-            else {
-                regularMask.set(idx);
-            }
-        }
-
-        return new ResultRankingContext(currentIndex.totalDocCount(),
-                rankingParams,
-                ngramsMask,
-                regularMask,
-                new CqDataInt(full),
-                new CqDataInt(prio));
-    }
-
-    /** This class is responsible for executing a search query. It uses a thread pool to
-     * execute the subqueries and their valuation in parallel. The results are then combined
-     * into a bounded priority queue, and finally the best results are returned.
-     */
-    private class QueryExecution {
-
-        private static final Executor workerPool = Executors.newCachedThreadPool();
-
-        /** The queue where the results from the index lookup threads are placed,
-         * pending ranking by the result ranker threads */
-        private final ArrayBlockingQueue<CombinedDocIdList> resultCandidateQueue
-                = new ArrayBlockingQueue<>(64);
-        private final ResultPriorityQueue resultHeap;
-
-        private final ResultRankingContext resultRankingContext;
-        private final AtomicInteger remainingIndexTasks = new AtomicInteger(0);
-
-        private final AtomicInteger remainingValuationTasks = new AtomicInteger(0);
-        private final AtomicLong blockTime = new AtomicLong(0);
-
-        private final AtomicLong stallTime = new AtomicLong(0);
-
-        public long getStallTime() {
-            return stallTime.get();
-        }
-
-        public long getBlockTime() {
-            return blockTime.get();
-        }
-
-        private QueryExecution(ResultRankingContext resultRankingContext, int maxResults) {
-            this.resultRankingContext = resultRankingContext;
-            this.resultHeap = new ResultPriorityQueue(maxResults);
-        }
-
-        /** Execute a search query */
-        public List<RpcDecoratedResultItem> run(SearchParameters parameters) throws Exception {
-
-            var terms = new SearchTerms(parameters.query, parameters.compiledQueryIds);
-
-            var currentIndex = statefulIndex.get();
-            for (var indexQuery : currentIndex.createQueries(terms, parameters.queryParams)) {
-                workerPool.execute(new IndexLookup(indexQuery, parameters.budget));
-            }
-
-            for (int i = 0; i < indexValuationThreads; i++) {
-                workerPool.execute(new ResultRanker(parameters, resultRankingContext));
-            }
-
-            // Wait for all tasks to complete
-            awaitCompletion();
-
-            // Return the best results
-            return resultValuator.selectBestResults(parameters, resultRankingContext, resultHeap);
-        }
-
-        /** Wait for all tasks to complete */
-        private void awaitCompletion() throws InterruptedException {
-            synchronized (remainingValuationTasks) {
-                while (remainingValuationTasks.get() > 0) {
-                    remainingValuationTasks.wait(20);
-                }
-            }
-        }
-        /** This class is responsible for executing a subquery and adding the results to the
-         * resultCandidateQueue, which depending on the state of the valuator threads may
-         * or may not block */
-        class IndexLookup implements Runnable {
-            private final IndexQuery query;
-
-            private final IndexSearchBudget budget;
-
-            IndexLookup(IndexQuery query,
-                        IndexSearchBudget budget) {
-                this.query = query;
-                this.budget = budget;
-
-                remainingIndexTasks.incrementAndGet();
-            }
-
-            public void run() {
-                try {
-                    executeSearch();
-                }
-                catch (Exception ex) {
-                    logger.error("Error in index lookup", ex);
-                }
-                finally {
-                    synchronized (remainingIndexTasks) {
-                        if (remainingIndexTasks.decrementAndGet() == 0) {
-                            remainingIndexTasks.notifyAll();
-                        }
-                    }
-                }
-            }
-
-            private void executeSearch() {
-                final LongArrayList results = new LongArrayList(16);
-
-                // These queries are different indices for one subquery
-                final LongQueryBuffer buffer = new LongQueryBuffer(4096);
-
-                while (query.hasMore() && budget.hasTimeLeft())
-                {
-                    buffer.reset();
-                    query.getMoreResults(buffer);
-
-                    for (int i = 0; i < buffer.end; i+=16) {
-                        for (int j = 0; j < Math.min(buffer.end - i, 16); j++) {
-                            results.add(buffer.data.get(i+j));
-                        }
-                        enqueueResults(new CombinedDocIdList(results));
-                        results.clear();
-                    }
-                }
-
-                buffer.dispose();
-            }
-
-            private void enqueueResults(CombinedDocIdList resultIds) {
-                long remainingTime = budget.timeLeft();
-
-                try {
-                    if (!resultCandidateQueue.offer(resultIds)) {
-                        long start = System.currentTimeMillis();
-                        resultCandidateQueue.offer(resultIds, remainingTime, TimeUnit.MILLISECONDS);
-                        blockTime.addAndGet(System.currentTimeMillis() - start);
-                    }
-                }
-                catch (InterruptedException e) {
-                    logger.warn("Interrupted while waiting to offer resultIds to queue", e);
-                }
-            }
-
-        }
-        class ResultRanker implements Runnable {
-            private final SearchParameters parameters;
-
-            private final ResultRankingContext rankingContext;
-
-            ResultRanker(SearchParameters parameters, ResultRankingContext rankingContext) {
-                this.parameters = parameters;
-                this.rankingContext = rankingContext;
-
-                remainingValuationTasks.incrementAndGet();
-            }
-
-            public void run() {
-                try {
-                    while (parameters.budget.timeLeft() > 0 && execute());
-                }
-                catch (InterruptedException e) {
-                    logger.warn("Interrupted while waiting to poll resultIds from queue", e);
-                }
-                catch (Exception e) {
-                    logger.error("Exception while ranking results", e);
-                }
-                finally {
-                    synchronized (remainingValuationTasks) {
-                        if (remainingValuationTasks.decrementAndGet() == 0)
-                            remainingValuationTasks.notifyAll();
-                    }
-                }
-            }
-
-            private boolean execute() throws Exception {
-                long start = System.currentTimeMillis();
-
-                // Do a relatively short poll to ensure we terminate in a timely manner
-                // in the event all work is done
-                final long pollTime = Math.clamp(parameters.budget.timeLeft(), 1, 5);
-                CombinedDocIdList resultIds = resultCandidateQueue.poll(pollTime, TimeUnit.MILLISECONDS);
-
-                if (resultIds == null) {
-                    // check if we are done and can terminate
-                    if (remainingIndexTasks.get() == 0 && resultCandidateQueue.isEmpty()) {
-                        return false;
-                    }
-                }
-                else {
-                    stallTime.addAndGet(System.currentTimeMillis() - start);
-
-                    resultHeap.addAll(
-                            resultValuator.rankResults(parameters, false, rankingContext, resultIds)
-                    );
-                }
-
-                return true; // keep going
-            }
-
-        }
-
-    }

 }

--- a/code/index/java/nu/marginalia/index/index/IndexQueryBuilderImpl.java
+++ b/code/index/java/nu/marginalia/index/index/IndexQueryBuilderImpl.java
@@ -1,14 +1,14 @@
-package nu.marginalia.index.index;
+package nu.marginalia.index;

-import java.util.List;
 import gnu.trove.set.hash.TLongHashSet;
-import nu.marginalia.index.FullReverseIndexReader;
-import nu.marginalia.index.query.IndexQuery;
-import nu.marginalia.index.query.IndexQueryBuilder;
-import nu.marginalia.index.query.filter.QueryFilterAnyOf;
-import nu.marginalia.index.query.filter.QueryFilterStepIf;
+import nu.marginalia.index.reverse.FullReverseIndexReader;
+import nu.marginalia.index.reverse.IndexLanguageContext;
+import nu.marginalia.index.reverse.query.IndexQuery;
+import nu.marginalia.index.reverse.query.IndexSearchBudget;
+import nu.marginalia.index.reverse.query.filter.QueryFilterStepIf;

-public class IndexQueryBuilderImpl implements IndexQueryBuilder  {
+public class IndexQueryBuilder {
+    private final IndexLanguageContext context;
    private final IndexQuery query;
    private final FullReverseIndexReader reverseIndexFullReader;

@@ -20,8 +20,9 @@ public class IndexQueryBuilderImpl implements IndexQueryBuilder  {
     * */
    private final TLongHashSet alreadyConsideredTerms = new TLongHashSet();

-    IndexQueryBuilderImpl(FullReverseIndexReader reverseIndexFullReader, IndexQuery query)
+    IndexQueryBuilder(FullReverseIndexReader reverseIndexFullReader, IndexLanguageContext context, IndexQuery query)
    {
+        this.context = context;
        this.query = query;
        this.reverseIndexFullReader = reverseIndexFullReader;
    }
@@ -32,18 +33,18 @@ public class IndexQueryBuilderImpl implements IndexQueryBuilder  {
        return this;
    }

-    public IndexQueryBuilder also(long termId) {
+    public IndexQueryBuilder also(long termId, IndexSearchBudget budget) {

        if (alreadyConsideredTerms.add(termId)) {
-            query.addInclusionFilter(reverseIndexFullReader.also(termId));
+            query.addInclusionFilter(reverseIndexFullReader.also(context, termId, budget));
        }

        return this;
    }

-    public IndexQueryBuilder not(long termId) {
+    public IndexQueryBuilder not(long termId, IndexSearchBudget budget) {

-        query.addInclusionFilter(reverseIndexFullReader.not(termId));
+        query.addInclusionFilter(reverseIndexFullReader.not(context, termId, budget));

        return this;
    }
@@ -55,20 +56,6 @@ public class IndexQueryBuilderImpl implements IndexQueryBuilder  {
        return this;
    }

-    public IndexQueryBuilder addInclusionFilterAny(List<QueryFilterStepIf> filterSteps) {
-        if (filterSteps.isEmpty())
-            return this;
-
-        if (filterSteps.size() == 1) {
-            query.addInclusionFilter(filterSteps.getFirst());
-        }
-        else {
-            query.addInclusionFilter(new QueryFilterAnyOf(filterSteps));
-        }
-
-        return this;
-    }
-
    public IndexQuery build() {
        return query;
    }
--- a/code/index/java/nu/marginalia/index/IndexQueryExecution.java
+++ b/code/index/java/nu/marginalia/index/IndexQueryExecution.java
@@ -0,0 +1,252 @@
+package nu.marginalia.index;
+
+import io.prometheus.client.Gauge;
+import nu.marginalia.api.searchquery.RpcDecoratedResultItem;
+import nu.marginalia.array.page.LongQueryBuffer;
+import nu.marginalia.index.model.CombinedDocIdList;
+import nu.marginalia.index.model.SearchContext;
+import nu.marginalia.index.results.IndexResultRankingService;
+import nu.marginalia.index.reverse.query.IndexQuery;
+import nu.marginalia.index.reverse.query.IndexSearchBudget;
+import nu.marginalia.skiplist.SkipListConstants;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.concurrent.*;
+
+/** Performs an index query */
+public class IndexQueryExecution {
+
+    private static final int indexValuationThreads = Integer.getInteger("index.valuationThreads", 8);
+    private static final int indexPreparationThreads = Integer.getInteger("index.preparationThreads", 2);
+
+    // Since most NVMe drives have a maximum read size of 128 KB, and most small reads are 512B
+    // this should probably be 128*1024 / 512 = 256 to reduce queue depth and optimize tail latency
+    private static final int evaluationBatchSize = 256;
+
+    // This should probably be SkipListConstants.BLOCK_SIZE / 16 in order to reduce the number of unnecessary read
+    // operations per lookup and again optimize tail latency
+    private static final int lookupBatchSize = SkipListConstants.BLOCK_SIZE / 16;
+
+    private static final ExecutorService threadPool =
+            new ThreadPoolExecutor(indexValuationThreads, 256,
+                    60L, TimeUnit.SECONDS, new SynchronousQueue<>());
+
+    private static final Logger log = LoggerFactory.getLogger(IndexQueryExecution.class);
+
+    private final String nodeName;
+    private final IndexResultRankingService rankingService;
+
+    private final SearchContext rankingContext;
+    private final List<IndexQuery> queries;
+    private final IndexSearchBudget budget;
+    private final ResultPriorityQueue resultHeap;
+
+    private final CountDownLatch lookupCountdown;
+    private final CountDownLatch preparationCountdown;
+    private final CountDownLatch rankingCountdown;
+
+    private final ArrayBlockingQueue<CombinedDocIdList> fullPreparationQueue = new ArrayBlockingQueue<>(1);
+    private final ArrayBlockingQueue<CombinedDocIdList> priorityPreparationQueue = new ArrayBlockingQueue<>(1);
+    private final ArrayBlockingQueue<IndexResultRankingService.RankingData> fullEvaluationQueue = new ArrayBlockingQueue<>(32);
+    private final ArrayBlockingQueue<IndexResultRankingService.RankingData> priorityEvaluationQueue = new ArrayBlockingQueue<>(32);
+
+    private final int limitTotal;
+    private final int limitByDomain;
+
+    private static final Gauge metric_index_lookup_time_s = Gauge.build()
+            .labelNames("node")
+            .name("index_exec_lookup_time_s")
+            .help("Time in query spent on lookups")
+            .register();
+
+    private static final Gauge metric_index_prep_time_s = Gauge.build()
+            .labelNames("node")
+            .name("index_exec_prep_time_s")
+            .help("Time in query spent retrieving positions and spans")
+            .register();
+
+    private static final Gauge metric_index_rank_time_s = Gauge.build()
+            .labelNames("node")
+            .name("index_exec_ranking_time_s")
+            .help("Time in query spent on ranking")
+            .register();
+
+    private static final Gauge metric_index_documents_ranked = Gauge.build()
+            .labelNames("node")
+            .name("index_exec_documents_ranked")
+            .help("Number of documents ranked")
+            .register();
+
+
+
+    public IndexQueryExecution(CombinedIndexReader currentIndex,
+                               IndexResultRankingService rankingService,
+                               SearchContext rankingContext,
+                               int serviceNode) {
+        this.nodeName = Integer.toString(serviceNode);
+        this.rankingService = rankingService;
+        this.rankingContext = rankingContext;
+
+        resultHeap = new ResultPriorityQueue(rankingContext.fetchSize);
+
+        budget = rankingContext.budget;
+        limitByDomain = rankingContext.limitByDomain;
+        limitTotal = rankingContext.limitTotal;
+
+        queries = currentIndex.createQueries(rankingContext);
+
+        lookupCountdown = new CountDownLatch(queries.size());
+        preparationCountdown = new CountDownLatch(indexPreparationThreads * 2);
+        rankingCountdown = new CountDownLatch(indexValuationThreads * 2);
+    }
+
+    public List<RpcDecoratedResultItem> run() throws InterruptedException, SQLException {
+        for (IndexQuery query : queries) {
+            threadPool.submit(() -> lookup(query));
+        }
+
+        for (int i = 0; i < indexPreparationThreads; i++) {
+            threadPool.submit(() -> prepare(priorityPreparationQueue, priorityEvaluationQueue));
+            threadPool.submit(() -> prepare(fullPreparationQueue, fullEvaluationQueue));
+        }
+
+        // Spawn lookup tasks for each query
+        for (int i = 0; i < indexValuationThreads; i++) {
+            threadPool.submit(() -> evaluate(priorityEvaluationQueue));
+            threadPool.submit(() -> evaluate(fullEvaluationQueue));
+        }
+
+        // Await lookup task termination
+        lookupCountdown.await();
+        preparationCountdown.await();
+        rankingCountdown.await();
+
+        // Deallocate any leftover ranking data buffers
+        for (var data : priorityEvaluationQueue) {
+            data.close();
+        }
+        for (var data : fullEvaluationQueue) {
+            data.close();
+        }
+
+        metric_index_documents_ranked
+                .labels(nodeName)
+                .inc(1000. * resultHeap.getItemsProcessed() / budget.getLimitTime());
+
+        // Final result selection
+        return rankingService.selectBestResults(limitByDomain, limitTotal, rankingContext, resultHeap.toList());
+    }
+
+    private List<Future<?>> lookup(IndexQuery query) {
+        final LongQueryBuffer buffer = new LongQueryBuffer(lookupBatchSize);
+        List<Future<?>> evaluationJobs = new ArrayList<>();
+        try {
+            while (query.hasMore() && budget.hasTimeLeft()) {
+
+                buffer.zero();
+
+                long st = System.nanoTime();
+                query.getMoreResults(buffer);
+                long et = System.nanoTime();
+                metric_index_lookup_time_s
+                        .labels(nodeName)
+                        .inc((et - st)/1_000_000_000.);
+
+                if (buffer.isEmpty())
+                    continue;
+
+                var queue = query.isPrioritized() ? priorityPreparationQueue : fullPreparationQueue;
+
+                if (buffer.end <= evaluationBatchSize) {
+                    var docIds = new CombinedDocIdList(buffer);
+
+                    if (!queue.offer(docIds, Math.max(1, budget.timeLeft()), TimeUnit.MILLISECONDS))
+                        break;
+                }
+                else {
+                    long[] bufferData = buffer.copyData();
+                    for (int start = 0; start < bufferData.length; start+= evaluationBatchSize) {
+
+                        long[] slice =  Arrays.copyOfRange(bufferData, start,
+                                Math.min(start + evaluationBatchSize, bufferData.length));
+
+                        var docIds = new CombinedDocIdList(slice);
+
+                        if (!queue.offer(docIds, Math.max(1, budget.timeLeft()), TimeUnit.MILLISECONDS))
+                            break;
+
+                    }
+                }
+            }
+        } catch (RuntimeException | InterruptedException ex) {
+            log.error("Exception in lookup thread", ex);
+        } finally {
+            buffer.dispose();
+            lookupCountdown.countDown();
+        }
+
+        return evaluationJobs;
+    }
+
+    private void prepare(ArrayBlockingQueue<CombinedDocIdList> inputQueue, ArrayBlockingQueue<IndexResultRankingService.RankingData> outputQueue) {
+        try {
+            while (budget.hasTimeLeft() && (lookupCountdown.getCount() > 0 || !inputQueue.isEmpty())) {
+                var docIds = inputQueue.poll(Math.clamp(budget.timeLeft(), 1, 5), TimeUnit.MILLISECONDS);
+                if (docIds == null) continue;
+
+                long st = System.nanoTime();
+                var preparedData = rankingService.prepareRankingData(rankingContext, docIds);
+                long et = System.nanoTime();
+                metric_index_prep_time_s
+                        .labels(nodeName)
+                        .inc((et - st)/1_000_000_000.);
+
+                if (!outputQueue.offer(preparedData, Math.max(1, budget.timeLeft()), TimeUnit.MILLISECONDS))
+                    preparedData.close();
+            }
+        } catch (TimeoutException ex) {
+            // This is normal
+        } catch (Exception ex) {
+            if (!(ex.getCause() instanceof InterruptedException)) {
+                log.error("Exception in lookup thread", ex);
+            }  // suppress logging for interrupted ex
+        } finally {
+            preparationCountdown.countDown();
+        }
+    }
+
+    private void evaluate(ArrayBlockingQueue<IndexResultRankingService.RankingData> queue) {
+        try {
+            while (budget.hasTimeLeft() && (preparationCountdown.getCount() > 0 || !queue.isEmpty())) {
+                var rankingData = queue.poll(Math.clamp(budget.timeLeft(), 1, 5), TimeUnit.MILLISECONDS);
+                if (rankingData == null) continue;
+
+                try (rankingData) {
+                    long st =  System.nanoTime();
+                    resultHeap.addAll(rankingService.rankResults(rankingContext, rankingData, false));
+                    long et = System.nanoTime();
+
+                    metric_index_rank_time_s
+                            .labels(nodeName)
+                            .inc((et - st)/1_000_000_000.);
+                }
+            }
+        } catch (Exception ex) {
+            if (!(ex.getCause() instanceof InterruptedException)) {
+                log.error("Exception in lookup thread", ex);
+            }  // suppress logging for interrupted ex
+        } finally {
+            rankingCountdown.countDown();
+        }
+    }
+
+    public int itemsProcessed() {
+        return resultHeap.getItemsProcessed();
+    }
+
+}
--- a/code/index/java/nu/marginalia/index/ResultPriorityQueue.java
+++ b/code/index/java/nu/marginalia/index/ResultPriorityQueue.java
@@ -1,5 +1,6 @@
 package nu.marginalia.index;

+import com.google.common.collect.MinMaxPriorityQueue;
 import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
 import nu.marginalia.api.searchquery.model.results.SearchResultItem;
 import org.jetbrains.annotations.NotNull;
@@ -9,108 +10,52 @@ import java.util.*;
 /** A priority queue for search results. This class is not thread-safe,
 * in general, except for concurrent use of the addAll method.
 * <p></p>
- * The class implements a subset of the Collection interface, and
- * is intended to be used as a priority queue for search results,
- * with a maximum size.
- * <p></p>
 * Since the expected use case is to add a large number of items
 * and then iterate over the items, the class is optimized for
 * this scenario, and does not implement other mutating methods
 * than addAll().
 */
-public class ResultPriorityQueue implements Iterable<SearchResultItem>,
-        Collection<SearchResultItem> {
-    private final int limit;
-    private final ArrayList<SearchResultItem> backingList = new ArrayList<>();
+public class ResultPriorityQueue implements Iterable<SearchResultItem> {
    private final LongOpenHashSet idsInSet = new LongOpenHashSet();
+    private final MinMaxPriorityQueue<SearchResultItem> queue;
+
+    private int itemsProcessed = 0;

    public ResultPriorityQueue(int limit) {
-        this.limit = limit;
+        this.queue = MinMaxPriorityQueue.<SearchResultItem>orderedBy(Comparator.naturalOrder()).maximumSize(limit).create();
    }

-    public Iterator<SearchResultItem> iterator() {
-        return backingList.iterator();
-    }
-
-    @NotNull
-    @Override
-    public Object[] toArray() {
-        return backingList.toArray();
-    }
-
-    @NotNull
-    @Override
-    public <T> T[] toArray(@NotNull T[] a) {
-        return backingList.toArray(a);
-    }
-
-    @Override
-    public boolean add(SearchResultItem searchResultItem) {
-        throw new UnsupportedOperationException("Use addAll instead");
-    }
-
-    @Override
-    public boolean remove(Object o) {
-        throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public boolean containsAll(@NotNull Collection<?> c) {
-        return idsInSet.containsAll(c);
+    public @NotNull Iterator<SearchResultItem> iterator() {
+        return queue.iterator();
    }

    /** Adds all items to the queue, and returns true if any items were added.
     * This is a thread-safe operation.
     */
-    @Override
    public synchronized boolean addAll(@NotNull Collection<? extends SearchResultItem> items) {
-        boolean itemsAdded = false;
-        for (var item: items) {
-            if (idsInSet.add(item.getDocumentId())) {
-                backingList.add(item);
-                itemsAdded = true;
-            }
-        }
-        if (!itemsAdded) {
-            return false;
-        }
+        itemsProcessed+=items.size();

-        backingList.sort(Comparator.naturalOrder());
-        if (backingList.size() > limit) {
-            backingList.subList(limit, backingList.size()).clear();
+        for (var item : items) {
+            if (idsInSet.add(item.getDocumentId())) {
+                queue.add(item);
+            }
        }

        return true;
    }

-    @Override
-    public boolean removeAll(@NotNull Collection<?> c) {
-        throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public boolean retainAll(@NotNull Collection<?> c) {
-        throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public void clear() {
-        backingList.clear();
-        idsInSet.clear();
+    public synchronized List<SearchResultItem> toList() {
+        return new ArrayList<>(queue);
    }

    public int size() {
-        return backingList.size();
+        return queue.size();
+    }
+    public int getItemsProcessed() {
+        return itemsProcessed;
    }
-
-    @Override
    public boolean isEmpty() {
-        return backingList.isEmpty();
-    }
-
-    @Override
-    public boolean contains(Object o) {
-        return backingList.contains(o);
+        return queue.isEmpty();
    }

 }
--- a/code/index/java/nu/marginalia/index/index/StatefulIndex.java
+++ b/code/index/java/nu/marginalia/index/index/StatefulIndex.java
@@ -1,8 +1,7 @@
-package nu.marginalia.index.index;
+package nu.marginalia.index;

 import com.google.inject.Inject;
 import com.google.inject.Singleton;
-import nu.marginalia.index.IndexFactory;
 import nu.marginalia.service.control.ServiceEventLog;
 import org.jetbrains.annotations.NotNull;
 import org.slf4j.Logger;
@@ -35,6 +34,13 @@ public class StatefulIndex {
        this.eventLog = eventLog;
    }

+    /** For use in testing only */
+    public StatefulIndex(CombinedIndexReader combinedIndexReader) {
+        this.combinedIndexReader = combinedIndexReader;
+        this.servicesFactory = null;
+        this.eventLog = null;
+    }
+
    public void init() {
        Lock lock = indexReplacementLock.writeLock();

--- a/code/index/index-forward/java/nu/marginalia/index/forward/ForwardIndexParameters.java
+++ b/code/index/index-forward/java/nu/marginalia/index/forward/ForwardIndexParameters.java
@@ -1,4 +1,4 @@
-package nu.marginalia.index.forward;
+package nu.marginalia.index.config;

 public class ForwardIndexParameters {
    public static final int ENTRY_SIZE = 3;
--- a/code/index/java/nu/marginalia/index/config/IndexFileName.java
+++ b/code/index/java/nu/marginalia/index/config/IndexFileName.java
@@ -0,0 +1,97 @@
+package nu.marginalia.index.config;
+
+import nu.marginalia.language.config.LanguageConfiguration;
+import nu.marginalia.language.model.LanguageDefinition;
+
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+
+public sealed interface IndexFileName {
+    enum Version {
+        CURRENT, NEXT
+    }
+
+    record FullWords(String languageIsoCode) implements IndexFileName {}
+    record FullDocs() implements IndexFileName {}
+    record FullPositions() implements IndexFileName {}
+
+    record PrioWords(String languageIsoCode) implements IndexFileName {}
+    record PrioDocs() implements IndexFileName {}
+
+    record ForwardDocIds() implements IndexFileName { }
+    record ForwardDocData() implements IndexFileName { }
+    record ForwardSpansData() implements IndexFileName { }
+
+    static List<IndexFileName> revFullIndexFiles(LanguageConfiguration languageConfiguration) {
+        List<IndexFileName> ret = new ArrayList<>();
+
+        ret.add(new FullDocs());
+        ret.add(new FullPositions());
+
+        for (LanguageDefinition ld : languageConfiguration.languages()) {
+            ret.add(new FullWords(ld.isoCode()));
+        }
+
+        return ret;
+    }
+
+    static List<IndexFileName> revPrioIndexFiles(LanguageConfiguration languageConfiguration) {
+        List<IndexFileName> ret = new ArrayList<>();
+
+        ret.add(new PrioDocs());
+
+        for (LanguageDefinition ld : languageConfiguration.languages()) {
+            ret.add(new PrioWords(ld.isoCode()));
+        }
+
+        return ret;
+    }
+
+    static List<IndexFileName> forwardIndexFiles() {
+        return List.of(
+                new ForwardDocData(),
+                new ForwardDocIds(),
+                new ForwardSpansData()
+        );
+    }
+
+    static Path resolve(Path basePath, IndexFileName fileName, Version version) {
+        return switch (fileName) {
+            case FullWords(String isoCode) -> switch (version) {
+                case CURRENT -> basePath.resolve("rev-words-%s.dat".formatted(isoCode));
+                case NEXT -> basePath.resolve("rev-words-%s.dat.next".formatted(isoCode));
+            };
+            case FullDocs() -> switch (version) {
+                case CURRENT -> basePath.resolve("rev-docs.dat");
+                case NEXT -> basePath.resolve("rev-docs.dat.next");
+            };
+            case FullPositions() -> switch (version) {
+                case CURRENT -> basePath.resolve("rev-positions.dat");
+                case NEXT -> basePath.resolve("rev-positions.dat.next");
+            };
+            case PrioWords(String languageIsoCode) -> switch (version) {
+                case CURRENT -> basePath.resolve("rev-prio-words-%s.dat".formatted(languageIsoCode));
+                case NEXT -> basePath.resolve("rev-prio-words-%s.dat.next".formatted(languageIsoCode));
+            };
+            case PrioDocs() -> switch (version) {
+                case CURRENT -> basePath.resolve("rev-prio-docs.dat");
+                case NEXT -> basePath.resolve("rev-prio-docs.dat.next");
+            };
+            case ForwardDocIds() -> switch (version) {
+                case CURRENT -> basePath.resolve("fwd-doc-ids.dat");
+                case NEXT -> basePath.resolve("fwd-doc-ids.dat.next");
+            };
+            case ForwardDocData() -> switch (version) {
+                case CURRENT -> basePath.resolve("fwd-doc-data.dat");
+                case NEXT -> basePath.resolve("fwd-doc-data.dat.next");
+            };
+            case ForwardSpansData() -> switch (version) {
+                case CURRENT -> basePath.resolve("fwd-spans.dat");
+                case NEXT -> basePath.resolve("fwd-spans.dat.next");
+            };
+        };
+    }
+
+
+}
--- a/code/index/java/nu/marginalia/index/config/ReverseIndexParameters.java
+++ b/code/index/java/nu/marginalia/index/config/ReverseIndexParameters.java
@@ -0,0 +1,9 @@
+package nu.marginalia.index.config;
+
+import nu.marginalia.btree.model.BTreeBlockSize;
+import nu.marginalia.btree.model.BTreeContext;
+
+public class ReverseIndexParameters
+{
+    public static final BTreeContext wordsBTreeContext = new BTreeContext(5, 2, BTreeBlockSize.BS_512);
+}
--- a/code/index/index-forward/java/nu/marginalia/index/forward/ForwardIndexReader.java
+++ b/code/index/index-forward/java/nu/marginalia/index/forward/ForwardIndexReader.java
@@ -1,9 +1,13 @@
 package nu.marginalia.index.forward;

+import it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap;
 import nu.marginalia.array.LongArray;
 import nu.marginalia.array.LongArrayFactory;
+import nu.marginalia.ffi.LinuxSystemCalls;
 import nu.marginalia.index.forward.spans.DocumentSpans;
-import nu.marginalia.index.forward.spans.ForwardIndexSpansReader;
+import nu.marginalia.index.forward.spans.IndexSpansReader;
+import nu.marginalia.index.model.CombinedDocIdList;
+import nu.marginalia.index.reverse.query.IndexSearchBudget;
 import nu.marginalia.model.id.UrlIdCodec;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -12,8 +16,9 @@ import java.io.IOException;
 import java.lang.foreign.Arena;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import java.util.concurrent.TimeoutException;

-import static nu.marginalia.index.forward.ForwardIndexParameters.*;
+import static nu.marginalia.index.config.ForwardIndexParameters.*;

 /** Reads the forward index.
 * <p/>
@@ -22,16 +27,15 @@ import static nu.marginalia.index.forward.ForwardIndexParameters.*;
 * and a mapping between document identifiers to the index into the
 * data array.
 * <p/>
- * Since the total data is relatively small, this is kept in memory to
- * reduce the amount of disk thrashing.
- * <p/>
 * The metadata is a binary encoding of {@see nu.marginalia.idx.DocumentMetadata}
 */
 public class ForwardIndexReader {
    private final LongArray ids;
    private final LongArray data;

-    private final ForwardIndexSpansReader spansReader;
+    private volatile Long2IntOpenHashMap idsMap;
+
+    private final IndexSpansReader spansReader;

    private final Logger logger = LoggerFactory.getLogger(getClass());

@@ -64,7 +68,22 @@ public class ForwardIndexReader {

        ids = loadIds(idsFile);
        data = loadData(dataFile);
-        spansReader = new ForwardIndexSpansReader(spansFile);
+
+        LinuxSystemCalls.madviseRandom(data.getMemorySegment());
+        LinuxSystemCalls.madviseRandom(ids.getMemorySegment());
+
+        spansReader = IndexSpansReader.open(spansFile);
+
+        Thread.ofPlatform().start(this::createIdsMap);
+    }
+
+    private void createIdsMap() {
+        Long2IntOpenHashMap idsMap = new Long2IntOpenHashMap((int) ids.size());
+        for (int i = 0; i < ids.size(); i++) {
+            idsMap.put(ids.get(i), i);
+        }
+        this.idsMap = idsMap;
+        logger.info("Forward index loaded into RAM");
    }

    private static LongArray loadIds(Path idsFile) throws IOException {
@@ -106,7 +125,11 @@ public class ForwardIndexReader {
    private int idxForDoc(long docId) {
        assert UrlIdCodec.getRank(docId) == 0 : "Forward Index Reader fed dirty reverse index id";

-        long offset = ids.binarySearch(docId, 0, ids.size());
+        if (idsMap != null) {
+            return idsMap.getOrDefault(docId, -1);
+        }
+
+        long offset = ids.binarySearch2(docId, 0, ids.size());

        if (offset >= ids.size() || offset < 0 || ids.get(offset) != docId) {
            if (getClass().desiredAssertionStatus()) {
@@ -118,22 +141,29 @@ public class ForwardIndexReader {
        return (int) offset;
    }

-    public DocumentSpans getDocumentSpans(Arena arena, long docId) {
-        long offset = idxForDoc(docId);
-        if (offset < 0) return new DocumentSpans();
+    public DocumentSpans[] getDocumentSpans(Arena arena, IndexSearchBudget budget, CombinedDocIdList combinedIds) throws TimeoutException {
+        long[] offsets = new long[combinedIds.size()];

-        long encodedOffset = data.get(ENTRY_SIZE * offset + SPANS_OFFSET);
+        for (int i = 0; i < offsets.length; i++) {
+            long docId = UrlIdCodec.removeRank(combinedIds.at(i));
+            long offset = idxForDoc(docId);
+            if (offset >= 0) {
+                offsets[i] = data.get(ENTRY_SIZE * offset + SPANS_OFFSET);
+            }
+            else {
+                offsets[i] = -1;
+            }
+        }

        try {
-            return spansReader.readSpans(arena, encodedOffset);
+            return spansReader.readSpans(arena, budget, offsets);
        }
        catch (IOException ex) {
-            logger.error("Failed to read spans for doc " + docId, ex);
-            return new DocumentSpans();
+            logger.error("Failed to read spans for docIds", ex);
+            return new DocumentSpans[offsets.length];
        }
    }

-
    public int totalDocCount() {
        return (int) ids.size();
    }
@@ -141,6 +171,8 @@ public class ForwardIndexReader {
    public void close() {
        if (data != null)
            data.close();
+        if (ids != null)
+            ids.close();
    }

    public boolean isLoaded() {
--- a/code/index/index-forward/java/nu/marginalia/index/forward/construction/ForwardIndexConverter.java
+++ b/code/index/index-forward/java/nu/marginalia/index/forward/construction/ForwardIndexConverter.java
@@ -3,10 +3,10 @@ package nu.marginalia.index.forward.construction;
 import it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap;
 import nu.marginalia.array.LongArray;
 import nu.marginalia.array.LongArrayFactory;
-import nu.marginalia.index.domainrankings.DomainRankings;
-import nu.marginalia.index.forward.ForwardIndexParameters;
-import nu.marginalia.index.forward.spans.ForwardIndexSpansWriter;
+import nu.marginalia.index.config.ForwardIndexParameters;
+import nu.marginalia.index.forward.spans.IndexSpansWriter;
 import nu.marginalia.index.journal.IndexJournal;
+import nu.marginalia.index.searchset.DomainRankings;
 import nu.marginalia.model.id.UrlIdCodec;
 import nu.marginalia.model.idx.DocumentMetadata;
 import nu.marginalia.process.control.ProcessHeartbeat;
@@ -65,7 +65,7 @@ public class ForwardIndexConverter {
        logger.info("Domain Rankings size = {}", domainRankings.size());

        try (var progress = heartbeat.createProcessTaskHeartbeat(TaskSteps.class, "forwardIndexConverter");
-             var spansWriter = new ForwardIndexSpansWriter(outputFileSpansData)
+             var spansWriter = new IndexSpansWriter(outputFileSpansData)
        ) {
            progress.progress(TaskSteps.GET_DOC_IDS);

--- a/code/index/index-forward/java/nu/marginalia/index/forward/spans/DocumentSpan.java
+++ b/code/index/index-forward/java/nu/marginalia/index/forward/spans/DocumentSpan.java
@@ -11,6 +11,9 @@ public class DocumentSpan {
    /** A list of the interlaced start and end positions of each span in the document of this type */
    private final IntList startsEnds;

+    public DocumentSpan(IntList startsEnds) {
+        this.startsEnds = startsEnds;
+    }
    public DocumentSpan(CodedSequence startsEnds) {
        this.startsEnds = startsEnds.values();
    }
--- a/code/index/index-forward/java/nu/marginalia/index/forward/spans/DocumentSpans.java
+++ b/code/index/index-forward/java/nu/marginalia/index/forward/spans/DocumentSpans.java
@@ -1,5 +1,6 @@
 package nu.marginalia.index.forward.spans;

+import it.unimi.dsi.fastutil.ints.IntList;
 import nu.marginalia.language.sentence.tag.HtmlTag;
 import nu.marginalia.sequence.CodedSequence;

@@ -39,6 +40,23 @@ public class DocumentSpans {
        return EMPTY_SPAN;
    }

+    void accept(byte code, IntList positions) {
+        if (code == HtmlTag.HEADING.code)
+            this.heading = new DocumentSpan(positions);
+        else if (code == HtmlTag.TITLE.code)
+            this.title = new DocumentSpan(positions);
+        else if (code == HtmlTag.NAV.code)
+            this.nav = new DocumentSpan(positions);
+        else if (code == HtmlTag.CODE.code)
+            this.code = new DocumentSpan(positions);
+        else if (code == HtmlTag.ANCHOR.code)
+            this.anchor = new DocumentSpan(positions);
+        else if (code == HtmlTag.EXTERNAL_LINKTEXT.code)
+            this.externalLinkText = new DocumentSpan(positions);
+        else if (code == HtmlTag.BODY.code)
+            this.body = new DocumentSpan(positions);
+    }
+
    void accept(byte code, CodedSequence positions) {
        if (code == HtmlTag.HEADING.code)
            this.heading = new DocumentSpan(positions);
--- a/code/index/java/nu/marginalia/index/forward/spans/IndexSpansReader.java
+++ b/code/index/java/nu/marginalia/index/forward/spans/IndexSpansReader.java
@@ -0,0 +1,24 @@
+package nu.marginalia.index.forward.spans;
+
+import nu.marginalia.index.reverse.query.IndexSearchBudget;
+
+import java.io.IOException;
+import java.lang.foreign.Arena;
+import java.nio.file.Path;
+import java.util.concurrent.TimeoutException;
+
+public interface IndexSpansReader extends AutoCloseable {
+    DocumentSpans[] readSpans(Arena arena, IndexSearchBudget budget, long[] encodedOffsets) throws TimeoutException, IOException;
+
+    static IndexSpansReader open(Path fileName) throws IOException {
+        int version = SpansCodec.parseSpanFilesFooter(fileName);
+        if (version == SpansCodec.SpansCodecVersion.PLAIN.ordinal()) {
+            return new IndexSpansReaderPlain(fileName);
+        }
+        else {
+            throw new IllegalArgumentException("Unsupported spans file version: " + version);
+        }
+    }
+
+    void close() throws IOException;
+}
--- a/code/index/java/nu/marginalia/index/forward/spans/IndexSpansReaderPlain.java
+++ b/code/index/java/nu/marginalia/index/forward/spans/IndexSpansReaderPlain.java
@@ -0,0 +1,100 @@
+package nu.marginalia.index.forward.spans;
+
+import it.unimi.dsi.fastutil.ints.IntArrayList;
+import nu.marginalia.index.reverse.query.IndexSearchBudget;
+import nu.marginalia.uring.UringFileReader;
+
+import java.io.IOException;
+import java.lang.foreign.Arena;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.ValueLayout;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+import java.util.concurrent.TimeoutException;
+
+public class IndexSpansReaderPlain implements IndexSpansReader {
+    private final UringFileReader uringReader;
+
+    public IndexSpansReaderPlain(Path spansFile) throws IOException {
+        if (Boolean.getBoolean("index.directModePositionsSpans")) {
+            if ((Files.size(spansFile) & 4095) != 0) {
+                throw new IllegalArgumentException("Spans file is not block aligned in size: " + Files.size(spansFile));
+            }
+
+            uringReader = new UringFileReader(spansFile,  true);
+        }
+        else {
+            uringReader = new UringFileReader(spansFile,  false);
+            uringReader.fadviseWillneed();
+        }
+
+    }
+
+    @Override
+    public DocumentSpans[] readSpans(Arena arena, IndexSearchBudget budget, long[] encodedOffsets) throws TimeoutException {
+
+        int readCnt = 0;
+        for (long offset : encodedOffsets) {
+            if (offset < 0) continue;
+            readCnt ++;
+        }
+
+        if (readCnt == 0) {
+            return new DocumentSpans[encodedOffsets.length];
+        }
+
+        long[] offsets = new long[readCnt];
+        int[] sizes = new int[readCnt];
+
+        for (int idx = 0, j = 0; idx < encodedOffsets.length; idx++) {
+            if (encodedOffsets[idx] < 0)
+                continue;
+            long offset = encodedOffsets[idx];
+
+            offsets[j] = SpansCodec.decodeStartOffset(offset);
+            sizes[j] = SpansCodec.decodeSize(offset);
+            j++;
+        }
+
+        List<MemorySegment> buffers = uringReader.readUnaligned(arena, budget.timeLeft(), offsets, sizes, 4096);
+        DocumentSpans[] ret = new DocumentSpans[encodedOffsets.length];
+
+        for (int idx = 0, j = 0; idx < encodedOffsets.length; idx++) {
+            if (encodedOffsets[idx] < 0)
+                continue;
+            ret[idx] = decode(buffers.get(j++));
+        }
+
+        return ret;
+    }
+
+    public DocumentSpans decode(MemorySegment ms) {
+        int count = ms.get(ValueLayout.JAVA_INT, 0);
+        int pos = 4;
+        DocumentSpans ret = new DocumentSpans();
+
+        // Decode each span
+        for (int spanIdx = 0; spanIdx < count; spanIdx++) {
+            byte code = ms.get(ValueLayout.JAVA_BYTE, pos);
+            short len = ms.get(ValueLayout.JAVA_SHORT, pos+2);
+
+            IntArrayList values = new IntArrayList(len);
+
+            pos += 4;
+            for (int i = 0; i < len; i++) {
+                values.add(ms.get(ValueLayout.JAVA_INT, pos + 4*i));
+            }
+            ret.accept(code, values);
+            pos += 4*len;
+        }
+
+        return ret;
+    }
+
+    @Override
+    public void close() throws IOException {
+        uringReader.close();
+    }
+
+}
--- a/code/index/java/nu/marginalia/index/forward/spans/IndexSpansWriter.java
+++ b/code/index/java/nu/marginalia/index/forward/spans/IndexSpansWriter.java
@@ -0,0 +1,76 @@
+package nu.marginalia.index.forward.spans;
+
+import nu.marginalia.sequence.VarintCodedSequence;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+
+public class IndexSpansWriter implements AutoCloseable {
+    private final FileChannel outputChannel;
+    private final ByteBuffer work = ByteBuffer.allocate(4*1024*1024).order(ByteOrder.nativeOrder());
+
+    private static Logger logger = LoggerFactory.getLogger(IndexSpansWriter.class);
+
+    private long stateStartOffset = -1;
+    private int stateLength = -1;
+
+    public IndexSpansWriter(Path outputFileSpansData) throws IOException {
+        this.outputChannel = (FileChannel) Files.newByteChannel(outputFileSpansData, StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE);
+    }
+
+    public void beginRecord(int count) throws IOException {
+        stateStartOffset = outputChannel.position();
+        stateLength = 0;
+
+        work.clear();
+        work.putInt(count);
+        work.flip();
+
+        while (work.hasRemaining())
+            stateLength += outputChannel.write(work);
+    }
+
+    public void writeSpan(byte spanCode, ByteBuffer sequenceData) throws IOException {
+        work.clear();
+        work.put(spanCode);
+        work.put((byte) 0); // Ensure we're byte aligned
+        var sequence = new VarintCodedSequence(sequenceData);
+
+        int spanLength = sequence.valueCount();
+
+        if (spanLength > 8192) {
+            logger.warn("Excessive span length with code {}: {}", spanCode, spanLength);
+            spanLength = 8192;
+        }
+        work.putShort((short) spanLength);
+
+        var iter = sequence.iterator();
+        for (int spanIdx = 0; iter.hasNext() && spanIdx < spanLength; spanIdx++) {
+            work.putInt(iter.nextInt());
+        }
+        work.flip();
+
+        stateLength += outputChannel.write(work);
+    }
+
+    public long endRecord() {
+        return SpansCodec.encode(stateStartOffset, stateLength);
+    }
+
+    @Override
+    public void close() throws IOException {
+        ByteBuffer footer = SpansCodec.createSpanFilesFooter(SpansCodec.SpansCodecVersion.PLAIN, (int) (4096 - (outputChannel.position() & 4095)));
+        outputChannel.position(outputChannel.size());
+        while (footer.hasRemaining()) {
+            outputChannel.write(footer, outputChannel.size());
+        }
+        outputChannel.close();
+    }
+}
--- a/code/index/java/nu/marginalia/index/forward/spans/SpansCodec.java
+++ b/code/index/java/nu/marginalia/index/forward/spans/SpansCodec.java
@@ -0,0 +1,64 @@
+package nu.marginalia.index.forward.spans;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+
+public class SpansCodec {
+    public static int MAGIC_INT = 0xF000F000;
+    public static int FOOTER_SIZE = 8;
+
+    public enum SpansCodecVersion {
+        @Deprecated
+        DEPRECATED_1, // This must not be removed, the ordinal is used to encode the version
+        PLAIN
+    }
+
+    public static long encode(long startOffset, long size) {
+        assert size < 0x1000_0000L : "Size must be less than 2^28";
+
+        return startOffset << 28 | (size & 0xFFF_FFFFL);
+    }
+
+    public static long decodeStartOffset(long encoded) {
+        return encoded >>> 28;
+    }
+
+    public static int decodeSize(long encoded) {
+        return (int) (encoded & 0x0FFF_FFFFL);
+    }
+
+    public static ByteBuffer createSpanFilesFooter(SpansCodecVersion version, int padSize) {
+        if (padSize < FOOTER_SIZE) {
+            padSize += 4096;
+        }
+
+        ByteBuffer footer = ByteBuffer.allocate(padSize);
+        footer.position(padSize - FOOTER_SIZE);
+        footer.putInt(SpansCodec.MAGIC_INT);
+        footer.put((byte) version.ordinal());
+        footer.put((byte) 0);
+        footer.put((byte) 0);
+        footer.put((byte) 0);
+        footer.flip();
+        return footer;
+    }
+
+    public static int parseSpanFilesFooter(Path spansFile) throws IOException {
+        ByteBuffer buffer = ByteBuffer.allocate(FOOTER_SIZE);
+
+        try (var fc = FileChannel.open(spansFile, StandardOpenOption.READ)) {
+            if (fc.size() < FOOTER_SIZE) return 0;
+            fc.read(buffer, fc.size() - buffer.capacity());
+            buffer.flip();
+            int magic = buffer.getInt();
+            if (magic != MAGIC_INT) {
+                return 0;
+            }
+            return buffer.get();
+        }
+
+    }
+}
--- a/code/index/java/nu/marginalia/index/results/model/ids/CombinedDocIdList.java
+++ b/code/index/java/nu/marginalia/index/results/model/ids/CombinedDocIdList.java
@@ -1,14 +1,14 @@
-package nu.marginalia.index.results.model.ids;
+package nu.marginalia.index.model;

 import it.unimi.dsi.fastutil.longs.LongArrayList;
-import org.roaringbitmap.longlong.Roaring64Bitmap;
+import nu.marginalia.array.page.LongQueryBuffer;

 import java.util.Arrays;
 import java.util.stream.LongStream;

 /** A list of document ids, with their ranking bits still remaining.
 *
- * @see nu.marginalia.index.results.model.ids.DocIdList
+ * @see DocIdList
 * @see nu.marginalia.model.id.UrlIdCodec
 * */
 public final class CombinedDocIdList {
@@ -17,17 +17,23 @@ public final class CombinedDocIdList {
    public CombinedDocIdList(long... data) {
        this.data = Arrays.copyOf(data, data.length);
    }
-
+    public CombinedDocIdList(LongQueryBuffer buffer) {
+        this.data = buffer.copyData();
+    }
    public CombinedDocIdList(LongArrayList data) {
        this.data = data.toLongArray();
    }
-    public CombinedDocIdList(Roaring64Bitmap data) {
-        this.data = data.toArray();
-    }
    public CombinedDocIdList() {
        this.data = new long[0];
    }

+    public static CombinedDocIdList combineLists(CombinedDocIdList one, CombinedDocIdList other) {
+        long[] data = new long[one.size() + other.size()];
+        System.arraycopy(one.data, 0, data, 0, one.data.length);
+        System.arraycopy(other.data, 0, data, one.data.length, other.data.length);
+        return new CombinedDocIdList(data);
+    }
+
    public int size() {
        return data.length;
    }
--- a/code/index/java/nu/marginalia/index/results/model/ids/DocIdList.java
+++ b/code/index/java/nu/marginalia/index/results/model/ids/DocIdList.java
@@ -1,14 +1,13 @@
-package nu.marginalia.index.results.model.ids;
+package nu.marginalia.index.model;

 import it.unimi.dsi.fastutil.longs.LongArrayList;

 import java.util.Arrays;
-import java.util.Objects;
 import java.util.stream.LongStream;

 /** A list of document ids, with their ranking bits removed.
 *
- * @see nu.marginalia.index.results.model.ids.CombinedDocIdList
+ * @see CombinedDocIdList
 * @see nu.marginalia.model.id.UrlIdCodec
 * */
 public final class DocIdList {
--- a/code/index/java/nu/marginalia/index/results/model/PhraseConstraintGroupList.java
+++ b/code/index/java/nu/marginalia/index/results/model/PhraseConstraintGroupList.java
@@ -1,10 +1,9 @@
-package nu.marginalia.index.results.model;
+package nu.marginalia.index.model;

 import it.unimi.dsi.fastutil.ints.IntArrayList;
 import it.unimi.dsi.fastutil.ints.IntIterator;
 import it.unimi.dsi.fastutil.ints.IntList;
-import nu.marginalia.index.model.SearchTermsUtil;
-import nu.marginalia.index.results.model.ids.TermIdList;
+import nu.marginalia.language.keywords.KeywordHasher;
 import nu.marginalia.sequence.CodedSequence;
 import nu.marginalia.sequence.SequenceOperations;

@@ -58,9 +57,10 @@ public class PhraseConstraintGroupList {
        private final int[] offsets;
        private final BitSet present;
        private final BitSet termIdsMask;
+        private final int presentCardinality;

        public final int size;
-        public PhraseConstraintGroup(List<String> terms, TermIdList termIdsAll) {
+        public PhraseConstraintGroup(KeywordHasher hasher, List<String> terms, TermIdList termIdsAll) {
            offsets = new int[terms.size()];
            present = new BitSet(terms.size());
            size = terms.size();
@@ -74,7 +74,7 @@ public class PhraseConstraintGroupList {
                }

                present.set(i);
-                long termId = SearchTermsUtil.getWordId(term);
+                long termId = hasher.hashKeyword(term);

                int idx = termIdsAll.indexOf(termId);
                if (idx < 0) {
@@ -85,6 +85,8 @@ public class PhraseConstraintGroupList {
                    termIdsMask.set(idx);
                }
            }
+
+            presentCardinality = present.cardinality();
        }

        /** Returns true if the term with index termIdx in the query is in the group */
@@ -93,7 +95,7 @@ public class PhraseConstraintGroupList {
        }

        public boolean test(CodedSequence[] positions) {
-            IntIterator[] sequences = new IntIterator[present.cardinality()];
+            IntIterator[] sequences = new IntIterator[presentCardinality];

            for (int oi = 0, si = 0; oi < offsets.length; oi++) {
                if (!present.get(oi)) {
@@ -120,7 +122,7 @@ public class PhraseConstraintGroupList {


        public IntList findIntersections(IntList[] positions) {
-            IntList[] sequences = new IntList[present.cardinality()];
+            IntList[] sequences = new IntList[presentCardinality];
            int[] iterOffsets = new int[sequences.length];

            for (int oi = 0, si = 0; oi < offsets.length; oi++) {
@@ -144,12 +146,41 @@ public class PhraseConstraintGroupList {
                iterOffsets[si - 1] = -oi;
            }

-            return SequenceOperations.findIntersections(sequences, iterOffsets);
+            return SequenceOperations.findIntersections(sequences, iterOffsets, Integer.MAX_VALUE);
+        }
+
+
+        public IntList findIntersections(IntList[] positions, int n) {
+            IntList[] sequences = new IntList[presentCardinality];
+            int[] iterOffsets = new int[sequences.length];
+
+            for (int oi = 0, si = 0; oi < offsets.length; oi++) {
+                if (!present.get(oi)) {
+                    continue;
+                }
+                int offset = offsets[oi];
+                if (offset < 0)
+                    return IntList.of();
+
+                // Create iterators that are offset by their relative position in the
+                // sequence.  This is done by subtracting the index from the offset,
+                // so that when we intersect them, an overlap means that the terms are
+                // in the correct order.  Note the offset is negative!
+
+                var posForTerm = positions[offset];
+                if (posForTerm == null) {
+                    return IntList.of();
+                }
+                sequences[si++] = posForTerm;
+                iterOffsets[si - 1] = -oi;
+            }
+
+            return SequenceOperations.findIntersections(sequences, iterOffsets, n);
        }

        public int minDistance(IntList[] positions) {
-            List<IntList> sequences = new ArrayList<>(present.cardinality());
-            IntList iterOffsets = new IntArrayList(present.cardinality());
+            List<IntList> sequences = new ArrayList<>(presentCardinality);
+            IntList iterOffsets = new IntArrayList(presentCardinality);

            for (int oi = 0; oi < offsets.length; oi++) {
                if (!present.get(oi)) {
--- a/code/index/java/nu/marginalia/index/model/QueryParams.java
+++ b/code/index/java/nu/marginalia/index/model/QueryParams.java
@@ -1,8 +1,9 @@
 package nu.marginalia.index.model;

+import nu.marginalia.api.searchquery.model.query.QueryStrategy;
+import nu.marginalia.api.searchquery.model.query.SpecificationLimit;
+import nu.marginalia.api.searchquery.model.query.SpecificationLimitType;
 import nu.marginalia.index.searchset.SearchSet;
-import nu.marginalia.index.query.limit.QueryStrategy;
-import nu.marginalia.index.query.limit.SpecificationLimit;

 import java.util.Objects;

@@ -41,6 +42,13 @@ public final class QueryParams {
        this.queryStrategy = queryStrategy;
    }

+    public boolean imposesDomainMetadataConstraint() {
+        return qualityLimit.type() != SpecificationLimitType.NONE
+                ||  year.type() != SpecificationLimitType.NONE
+                ||  size.type() != SpecificationLimitType.NONE
+                ||  rank.type() != SpecificationLimitType.NONE;
+    }
+
    public SpecificationLimit qualityLimit() {
        return qualityLimit;
    }
--- a/code/index/java/nu/marginalia/index/model/SearchContext.java
+++ b/code/index/java/nu/marginalia/index/model/SearchContext.java
@@ -0,0 +1,243 @@
+package nu.marginalia.index.model;
+
+import gnu.trove.map.hash.TObjectLongHashMap;
+import it.unimi.dsi.fastutil.longs.LongArrayList;
+import it.unimi.dsi.fastutil.longs.LongComparator;
+import it.unimi.dsi.fastutil.longs.LongList;
+import nu.marginalia.api.searchquery.IndexProtobufCodec;
+import nu.marginalia.api.searchquery.RpcIndexQuery;
+import nu.marginalia.api.searchquery.RpcQueryLimits;
+import nu.marginalia.api.searchquery.RpcResultRankingParameters;
+import nu.marginalia.api.searchquery.model.compiled.CompiledQuery;
+import nu.marginalia.api.searchquery.model.compiled.CompiledQueryLong;
+import nu.marginalia.api.searchquery.model.compiled.CompiledQueryParser;
+import nu.marginalia.api.searchquery.model.compiled.CqDataInt;
+import nu.marginalia.api.searchquery.model.query.QueryStrategy;
+import nu.marginalia.api.searchquery.model.query.SearchPhraseConstraint;
+import nu.marginalia.api.searchquery.model.query.SearchQuery;
+import nu.marginalia.api.searchquery.model.query.SearchSpecification;
+import nu.marginalia.api.searchquery.model.results.PrototypeRankingParameters;
+import nu.marginalia.index.CombinedIndexReader;
+import nu.marginalia.index.reverse.IndexLanguageContext;
+import nu.marginalia.index.reverse.query.IndexSearchBudget;
+import nu.marginalia.index.searchset.SearchSet;
+import nu.marginalia.language.keywords.KeywordHasher;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.List;
+
+import static nu.marginalia.api.searchquery.IndexProtobufCodec.convertSpecLimit;
+
+public class SearchContext {
+    private static final Logger logger = LoggerFactory.getLogger(SearchContext.class);
+
+    public final IndexSearchBudget budget;
+
+    public final int fetchSize;
+    public final int limitByDomain;
+    public final int limitTotal;
+
+    private final int docCount;
+
+    public final RpcResultRankingParameters params;
+    public final SearchQuery searchQuery;
+    public final QueryParams queryParams;
+
+    public final CompiledQuery<String> compiledQuery;
+    public final CompiledQueryLong compiledQueryIds;
+
+    /** Bitmask whose position correspond to the positions in the compiled query data
+     * which are regular words.
+     */
+    public final BitSet regularMask;
+
+    /** Bitmask whose position correspond to the positions in the compiled query data
+     * which are ngrams.
+     */
+    public final BitSet ngramsMask;
+
+    /** CqDataInt associated with frequency information of the terms in the query
+     * in the full index.  The dataset is indexed by the compiled query. */
+    public final CqDataInt fullCounts;
+
+    /** CqDataInt associated with frequency information of the terms in the query
+     * in the full index.  The dataset is indexed by the compiled query. */
+    public final CqDataInt priorityCounts;
+
+    public final TermIdList termIdsAll;
+    public final PhraseConstraintGroupList phraseConstraints;
+
+    public final LongList termIdsAdvice;
+    public final LongList termIdsExcludes;
+    public final LongList termIdsPriority;
+
+    public final IndexLanguageContext languageContext;
+
+    public static SearchContext create(CombinedIndexReader currentIndex,
+                                       KeywordHasher keywordHasher,
+                                       SearchSpecification specsSet,
+                                       SearchSet searchSet) {
+
+        var queryParams = new QueryParams(specsSet.quality, specsSet.year, specsSet.size, specsSet.rank, searchSet, specsSet.queryStrategy);
+        var rankingParams = specsSet.rankingParams;
+        var limits = specsSet.queryLimits;
+
+        return new SearchContext(
+                keywordHasher,
+                "en", // FIXME: This path currently only supports english
+                currentIndex,
+                specsSet.query.compiledQuery,
+                queryParams,
+                specsSet.query,
+                rankingParams,
+                limits);
+    }
+
+    public static SearchContext create(CombinedIndexReader currentIndex,
+                                       KeywordHasher keywordHasher,
+                                       RpcIndexQuery request, SearchSet searchSet) {
+        var limits = request.getQueryLimits();
+        var query = IndexProtobufCodec.convertRpcQuery(request.getQuery());
+
+        var queryParams = new QueryParams(
+                convertSpecLimit(request.getQuality()),
+                convertSpecLimit(request.getYear()),
+                convertSpecLimit(request.getSize()),
+                convertSpecLimit(request.getRank()),
+                searchSet,
+                QueryStrategy.valueOf(request.getQueryStrategy()));
+
+        var rankingParams = request.hasParameters() ? request.getParameters() : PrototypeRankingParameters.sensibleDefaults();
+
+        return new SearchContext(
+                keywordHasher,
+                request.getLangIsoCode(),
+                currentIndex,
+                query.compiledQuery,
+                queryParams,
+                query,
+                rankingParams,
+                limits);
+    }
+
+    public SearchContext(
+                         KeywordHasher keywordHasher,
+                         String langIsoCode,
+                         CombinedIndexReader currentIndex,
+                         String queryExpression,
+                         QueryParams queryParams,
+                         SearchQuery query,
+                         RpcResultRankingParameters rankingParams,
+                         RpcQueryLimits limits)
+    {
+        this.docCount = currentIndex.totalDocCount();
+        this.languageContext = currentIndex.createLanguageContext(langIsoCode);
+
+        this.budget = new IndexSearchBudget(Math.max(limits.getTimeoutMs()/2, limits.getTimeoutMs()-50));
+        this.searchQuery = query;
+        this.params = rankingParams;
+        this.queryParams = queryParams;
+
+        this.fetchSize = limits.getFetchSize();
+        this.limitByDomain = limits.getResultsByDomain();
+        this.limitTotal = limits.getResultsTotal();
+
+
+        this.compiledQuery = CompiledQueryParser.parse(queryExpression);
+        this.compiledQueryIds = compiledQuery.mapToLong(keywordHasher::hashKeyword);
+        int[] full = new int[compiledQueryIds.size()];
+        int[] prio = new int[compiledQueryIds.size()];
+
+        this.ngramsMask = new BitSet(compiledQuery.size());
+        this.regularMask = new BitSet(compiledQuery.size());
+
+        for (int idx = 0; idx < compiledQueryIds.size(); idx++) {
+            long id = compiledQueryIds.at(idx);
+            full[idx] = currentIndex.numHits(this.languageContext, id);
+            prio[idx] = currentIndex.numHitsPrio(this.languageContext, id);
+
+            if (compiledQuery.at(idx).contains("_")) {
+                ngramsMask.set(idx);
+            }
+            else {
+                regularMask.set(idx);
+            }
+        }
+
+        this.fullCounts = new CqDataInt(full);
+        this.priorityCounts = new CqDataInt(prio);
+
+        this.termIdsExcludes = new LongArrayList();
+        this.termIdsPriority = new LongArrayList();
+        this.termIdsAdvice = new LongArrayList();
+
+        for (var word : searchQuery.searchTermsAdvice) {
+            termIdsAdvice.add(keywordHasher.hashKeyword(word));
+        }
+
+        for (var word : searchQuery.searchTermsExclude) {
+            termIdsExcludes.add(keywordHasher.hashKeyword(word));
+        }
+
+        for (var word : searchQuery.searchTermsPriority) {
+            termIdsPriority.add(keywordHasher.hashKeyword(word));
+        }
+
+        LongArrayList termIdsList = new LongArrayList();
+        TObjectLongHashMap<Object> termToId = new TObjectLongHashMap<>();
+
+        for (String word : compiledQuery) {
+            long id = keywordHasher.hashKeyword(word);
+            termIdsList.add(id);
+            termToId.put(word, id);
+        }
+
+        for (var term : searchQuery.searchTermsPriority) {
+            if (termToId.containsKey(term)) {
+                continue;
+            }
+
+            long id = keywordHasher.hashKeyword(term);
+            termIdsList.add(id);
+            termToId.put(term, id);
+        }
+
+        termIdsAll = new TermIdList(termIdsList);
+
+        var constraintsMandatory = new ArrayList<PhraseConstraintGroupList.PhraseConstraintGroup>();
+        var constraintsFull = new ArrayList<PhraseConstraintGroupList.PhraseConstraintGroup>();
+        var constraintsOptional = new ArrayList<PhraseConstraintGroupList.PhraseConstraintGroup>();
+
+        for (var constraint : searchQuery.phraseConstraints) {
+            switch (constraint) {
+                case SearchPhraseConstraint.Mandatory(List<String> terms) ->
+                        constraintsMandatory.add(new PhraseConstraintGroupList.PhraseConstraintGroup(keywordHasher, terms, termIdsAll));
+                case SearchPhraseConstraint.Optional(List<String> terms) ->
+                        constraintsOptional.add(new PhraseConstraintGroupList.PhraseConstraintGroup(keywordHasher, terms, termIdsAll));
+                case SearchPhraseConstraint.Full(List<String> terms) ->
+                        constraintsFull.add(new PhraseConstraintGroupList.PhraseConstraintGroup(keywordHasher, terms, termIdsAll));
+            }
+        }
+
+        if (constraintsFull.isEmpty()) {
+            logger.warn("No full constraints in query, adding empty group");
+            constraintsFull.add(new PhraseConstraintGroupList.PhraseConstraintGroup(keywordHasher, List.of(), termIdsAll));
+        }
+
+        this.phraseConstraints = new PhraseConstraintGroupList(constraintsFull.getFirst(), constraintsMandatory, constraintsOptional);
+    }
+
+    public int termFreqDocCount() {
+        return docCount;
+    }
+
+    public long[] sortedDistinctIncludes(LongComparator comparator) {
+        LongList list = new LongArrayList(compiledQueryIds.copyData());
+        list.sort(comparator);
+        return list.toLongArray();
+    }
+
+}
--- a/code/index/java/nu/marginalia/index/model/SearchParameters.java
+++ b/code/index/java/nu/marginalia/index/model/SearchParameters.java
@@ -1,97 +0,0 @@
-package nu.marginalia.index.model;
-
-import nu.marginalia.api.searchquery.IndexProtobufCodec;
-import nu.marginalia.api.searchquery.RpcIndexQuery;
-import nu.marginalia.api.searchquery.RpcResultRankingParameters;
-import nu.marginalia.api.searchquery.model.compiled.CompiledQuery;
-import nu.marginalia.api.searchquery.model.compiled.CompiledQueryLong;
-import nu.marginalia.api.searchquery.model.compiled.CompiledQueryParser;
-import nu.marginalia.api.searchquery.model.query.SearchQuery;
-import nu.marginalia.api.searchquery.model.query.SearchSpecification;
-import nu.marginalia.api.searchquery.model.results.PrototypeRankingParameters;
-import nu.marginalia.index.query.IndexSearchBudget;
-import nu.marginalia.index.query.limit.QueryStrategy;
-import nu.marginalia.index.searchset.SearchSet;
-
-import static nu.marginalia.api.searchquery.IndexProtobufCodec.convertSpecLimit;
-
-public class SearchParameters {
-    /**
-     * This is how many results matching the keywords we'll try to get
-     * before evaluating them for the best result.
-     */
-    public final int fetchSize;
-    public final IndexSearchBudget budget;
-    public final SearchQuery query;
-    public final QueryParams queryParams;
-    public final RpcResultRankingParameters rankingParams;
-
-    public final int limitByDomain;
-    public final int limitTotal;
-
-    public final CompiledQuery<String> compiledQuery;
-    public final CompiledQueryLong compiledQueryIds;
-
-    // mutable:
-
-    /**
-     * An estimate of how much data has been read
-     */
-    public long dataCost = 0;
-
-    public SearchParameters(SearchSpecification specsSet, SearchSet searchSet) {
-        var limits = specsSet.queryLimits;
-
-        this.fetchSize = limits.getFetchSize();
-        this.budget = new IndexSearchBudget(limits.getTimeoutMs());
-        this.query = specsSet.query;
-        this.limitByDomain = limits.getResultsByDomain();
-        this.limitTotal = limits.getResultsTotal();
-
-        queryParams = new QueryParams(
-                specsSet.quality,
-                specsSet.year,
-                specsSet.size,
-                specsSet.rank,
-                searchSet,
-                specsSet.queryStrategy);
-
-        compiledQuery = CompiledQueryParser.parse(this.query.compiledQuery);
-        compiledQueryIds = compiledQuery.mapToLong(SearchTermsUtil::getWordId);
-
-        rankingParams = specsSet.rankingParams;
-    }
-
-    public SearchParameters(RpcIndexQuery request, SearchSet searchSet) {
-        var limits = request.getQueryLimits();
-
-        this.fetchSize = limits.getFetchSize();
-
-        // The time budget is halved because this is the point when we start to
-        // wrap up the search and return the results.
-        this.budget = new IndexSearchBudget(limits.getTimeoutMs() / 2);
-        this.query = IndexProtobufCodec.convertRpcQuery(request.getQuery());
-
-        this.limitByDomain = limits.getResultsByDomain();
-        this.limitTotal = limits.getResultsTotal();
-
-        queryParams = new QueryParams(
-                convertSpecLimit(request.getQuality()),
-                convertSpecLimit(request.getYear()),
-                convertSpecLimit(request.getSize()),
-                convertSpecLimit(request.getRank()),
-                searchSet,
-                QueryStrategy.valueOf(request.getQueryStrategy()));
-
-        compiledQuery = CompiledQueryParser.parse(this.query.compiledQuery);
-        compiledQueryIds = compiledQuery.mapToLong(SearchTermsUtil::getWordId);
-
-        rankingParams = request.hasParameters() ? request.getParameters() : PrototypeRankingParameters.sensibleDefaults();
-    }
-
-
-    public long getDataCost() {
-        return dataCost;
-    }
-
-}
--- a/code/index/java/nu/marginalia/index/model/SearchTerms.java
+++ b/code/index/java/nu/marginalia/index/model/SearchTerms.java
@@ -1,72 +0,0 @@
-package nu.marginalia.index.model;
-
-import it.unimi.dsi.fastutil.longs.LongArrayList;
-import it.unimi.dsi.fastutil.longs.LongArraySet;
-import it.unimi.dsi.fastutil.longs.LongComparator;
-import it.unimi.dsi.fastutil.longs.LongList;
-import nu.marginalia.api.searchquery.model.compiled.CompiledQueryLong;
-import nu.marginalia.api.searchquery.model.query.SearchQuery;
-
-import static nu.marginalia.index.model.SearchTermsUtil.getWordId;
-
-public final class SearchTerms {
-    private final LongList advice;
-    private final LongList excludes;
-    private final LongList priority;
-
-    public static final LongArraySet stopWords = new LongArraySet(
-            new long[] {
-                    getWordId("a"),
-                    getWordId("an"),
-                    getWordId("the"),
-            }
-    );
-
-    private final CompiledQueryLong compiledQueryIds;
-
-    public SearchTerms(SearchQuery query,
-                       CompiledQueryLong compiledQueryIds)
-    {
-        this.excludes = new LongArrayList();
-        this.priority = new LongArrayList();
-
-        this.advice = new LongArrayList();
-        this.compiledQueryIds = compiledQueryIds;
-
-        for (var word : query.searchTermsAdvice) {
-            advice.add(getWordId(word));
-        }
-
-        for (var word : query.searchTermsExclude) {
-            excludes.add(getWordId(word));
-        }
-
-        for (var word : query.searchTermsPriority) {
-            priority.add(getWordId(word));
-        }
-    }
-
-    public boolean isEmpty() {
-        return compiledQueryIds.isEmpty();
-    }
-
-    public long[] sortedDistinctIncludes(LongComparator comparator) {
-        LongList list = new LongArrayList(compiledQueryIds.copyData());
-        list.sort(comparator);
-        return list.toLongArray();
-    }
-
-
-    public LongList excludes() {
-        return excludes;
-    }
-    public LongList advice() {
-        return advice;
-    }
-    public LongList priority() {
-        return priority;
-    }
-
-    public CompiledQueryLong compiledQuery() { return compiledQueryIds; }
-
-}
--- a/code/index/java/nu/marginalia/index/model/SearchTermsUtil.java
+++ b/code/index/java/nu/marginalia/index/model/SearchTermsUtil.java
@@ -1,13 +0,0 @@
-package nu.marginalia.index.model;
-
-import nu.marginalia.hash.MurmurHash3_128;
-
-public class SearchTermsUtil {
-
-    private static final MurmurHash3_128 hasher = new MurmurHash3_128();
-
-    /** Translate the word to a unique id. */
-    public static long getWordId(String s) {
-        return hasher.hashKeyword(s);
-    }
-}
--- a/code/index/java/nu/marginalia/index/results/model/ids/TermIdList.java
+++ b/code/index/java/nu/marginalia/index/results/model/ids/TermIdList.java
@@ -1,4 +1,4 @@
-package nu.marginalia.index.results.model.ids;
+package nu.marginalia.index.model;

 import it.unimi.dsi.fastutil.longs.LongArrayList;

@@ -6,7 +6,7 @@ import java.util.Arrays;
 import java.util.stream.LongStream;

 public final class TermIdList {
-    private final long[] array;
+    public final long[] array;

    public TermIdList(long[] array) {
        this.array = array;
--- a/Show More
+++ b/Show More