mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-05 21:22:39 +02:00
Compare commits
104 Commits
deploy-030
...
bc49406881
Author | SHA1 | Date | |
---|---|---|---|
|
bc49406881 | ||
|
90325be447 | ||
|
dc89587af3 | ||
|
7b552afd6b | ||
|
73557edc67 | ||
|
83919e448a | ||
|
6f5b75b84d | ||
|
db315e2813 | ||
|
e9977e08b7 | ||
|
1df3757e5f | ||
|
ca283f9684 | ||
|
85360e61b2 | ||
|
e2ccff21bc | ||
|
c5b5b0c699 | ||
|
9a65946e22 | ||
|
1d2ab21e27 | ||
|
0610cc19ad | ||
|
a676306a7f | ||
|
8d68cd14fb | ||
|
4773c5a52b | ||
|
74bd562ae4 | ||
|
c9751287b0 | ||
|
5da24e3fc4 | ||
|
20a4e86eec | ||
|
477a184948 | ||
|
8940ce99db | ||
|
0ac0fa4dca | ||
|
942f15ef14 | ||
|
f668f33d5b | ||
|
6789975cd2 | ||
|
c3ba608776 | ||
|
733d2687fe | ||
|
f6daac8ed0 | ||
|
c2eeee4a06 | ||
|
3b0c701df4 | ||
|
c6fb2db43b | ||
|
9bc8fe05ae | ||
|
440ffcf6f8 | ||
|
b07709cc72 | ||
|
9a6acdcbe0 | ||
|
23b9b0bf1b | ||
|
749c8ed954 | ||
|
9f4b6939ca | ||
|
1d08e44e8d | ||
|
fc2e156e78 | ||
|
5e68a89e9f | ||
|
d380661307 | ||
|
cccdf5c329 | ||
|
f085b4ea12 | ||
|
e208f7d3ba | ||
|
b577085cb2 | ||
|
b9240476f6 | ||
|
8f50f86d0b | ||
|
e3b7ead7a9 | ||
|
9a845ba604 | ||
|
b9381f1603 | ||
|
6a60127267 | ||
|
e8ffcfbb19 | ||
|
caf0850f81 | ||
|
62e3bb675e | ||
|
4dc3e7da7a | ||
|
92b09883ec | ||
|
87082b4ef8 | ||
|
84d3f6087f | ||
|
f93ba371a5 | ||
|
5eec27c68d | ||
|
ab01576f91 | ||
|
054e5ccf44 | ||
|
4351ea5128 | ||
|
49cfa3a5e9 | ||
|
683854b23f | ||
|
e880fa8945 | ||
|
2482dc572e | ||
|
4589f11898 | ||
|
e43b6e610b | ||
|
4772117a1f | ||
|
3fc7ea521c | ||
|
4372f5af03 | ||
|
4ad89b6c75 | ||
|
ad0519e031 | ||
|
596ece1230 | ||
|
07b6e1585b | ||
|
cb5e2778eb | ||
|
8f5ea7896c | ||
|
76c398e0b1 | ||
|
4a94f04a8d | ||
|
df72f670d4 | ||
|
eaa22c2f5a | ||
|
7be173aeca | ||
|
36685bdca7 | ||
|
ad04057609 | ||
|
eb76ae22e2 | ||
|
4b858ab341 | ||
|
c6e3c8aa3b | ||
|
9128d3907c | ||
|
4ef16d13d4 | ||
|
838a5626ec | ||
|
6b426209c7 | ||
|
452b5731d9 | ||
|
c91cf49630 | ||
|
8503030f18 | ||
|
744f7d3ef7 | ||
|
215e12afe9 | ||
|
2716bce918 |
@@ -14,6 +14,7 @@ apply from: "$rootProject.projectDir/srcsets.gradle"
|
||||
|
||||
dependencies {
|
||||
implementation project(':code:libraries:array')
|
||||
implementation project(':code:libraries:native')
|
||||
implementation project(':code:libraries:btree')
|
||||
implementation project(':code:libraries:coded-sequence')
|
||||
implementation project(':code:libraries:language-processing')
|
||||
|
@@ -3,6 +3,7 @@ package nu.marginalia.index.forward;
|
||||
import it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap;
|
||||
import nu.marginalia.array.LongArray;
|
||||
import nu.marginalia.array.LongArrayFactory;
|
||||
import nu.marginalia.ffi.LinuxSystemCalls;
|
||||
import nu.marginalia.index.forward.spans.DocumentSpans;
|
||||
import nu.marginalia.index.forward.spans.IndexSpansReader;
|
||||
import nu.marginalia.model.id.UrlIdCodec;
|
||||
@@ -65,6 +66,9 @@ public class ForwardIndexReader {
|
||||
ids = loadIds(idsFile);
|
||||
data = loadData(dataFile);
|
||||
|
||||
LinuxSystemCalls.madviseRandom(data.getMemorySegment());
|
||||
LinuxSystemCalls.madviseRandom(ids.getMemorySegment());
|
||||
|
||||
spansReader = IndexSpansReader.open(spansFile);
|
||||
|
||||
Thread.ofPlatform().start(this::createIdsMap);
|
||||
@@ -76,6 +80,7 @@ public class ForwardIndexReader {
|
||||
idsMap.put(ids.get(i), i);
|
||||
}
|
||||
this.idsMap = idsMap;
|
||||
logger.info("Forward index loaded into RAM");
|
||||
}
|
||||
|
||||
private static LongArray loadIds(Path idsFile) throws IOException {
|
||||
@@ -121,7 +126,7 @@ public class ForwardIndexReader {
|
||||
return idsMap.getOrDefault(docId, -1);
|
||||
}
|
||||
|
||||
long offset = ids.binarySearch(docId, 0, ids.size());
|
||||
long offset = ids.binarySearch2(docId, 0, ids.size());
|
||||
|
||||
if (offset >= ids.size() || offset < 0 || ids.get(offset) != docId) {
|
||||
if (getClass().desiredAssertionStatus()) {
|
||||
@@ -133,22 +138,6 @@ public class ForwardIndexReader {
|
||||
return (int) offset;
|
||||
}
|
||||
|
||||
public DocumentSpans getDocumentSpans(Arena arena, long docId) {
|
||||
long offset = idxForDoc(docId);
|
||||
if (offset < 0) return new DocumentSpans();
|
||||
|
||||
long encodedOffset = data.get(ENTRY_SIZE * offset + SPANS_OFFSET);
|
||||
|
||||
try {
|
||||
return spansReader.readSpans(arena, encodedOffset);
|
||||
}
|
||||
catch (IOException ex) {
|
||||
logger.error("Failed to read spans for doc " + docId, ex);
|
||||
return new DocumentSpans();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public DocumentSpans[] getDocumentSpans(Arena arena, long[] docIds) {
|
||||
long[] offsets = new long[docIds.length];
|
||||
for (int i = 0; i < docIds.length; i++) {
|
||||
|
@@ -1,44 +1,27 @@
|
||||
package nu.marginalia.index.forward.spans;
|
||||
|
||||
import it.unimi.dsi.fastutil.ints.IntArrayList;
|
||||
import nu.marginalia.uring.UringFileReader;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.foreign.Arena;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.lang.foreign.ValueLayout;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.ForkJoinPool;
|
||||
import java.util.List;
|
||||
|
||||
public class IndexSpansReaderPlain implements IndexSpansReader {
|
||||
private final FileChannel[] spansFileChannels;
|
||||
private final ForkJoinPool forkJoinPool;
|
||||
private final UringFileReader uringReader;
|
||||
|
||||
public IndexSpansReaderPlain(Path spansFile) throws IOException {
|
||||
this.spansFileChannels = new FileChannel[8];
|
||||
for (int i = 0; i < spansFileChannels.length; i++) {
|
||||
spansFileChannels[i] = (FileChannel) Files.newByteChannel(spansFile, StandardOpenOption.READ);
|
||||
}
|
||||
forkJoinPool = new ForkJoinPool(spansFileChannels.length);
|
||||
uringReader = new UringFileReader(spansFile, true);
|
||||
uringReader.fadviseWillneed();
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocumentSpans readSpans(Arena arena, long encodedOffset) throws IOException {
|
||||
// Decode the size and offset from the encoded offset
|
||||
long size = SpansCodec.decodeSize(encodedOffset);
|
||||
long offset = SpansCodec.decodeStartOffset(encodedOffset);
|
||||
|
||||
var ms = arena.allocate(size, 4);
|
||||
// Allocate a buffer from the arena
|
||||
var buffer = ms.asByteBuffer();
|
||||
while (buffer.hasRemaining()) {
|
||||
spansFileChannels[0].read(buffer, offset + buffer.position());
|
||||
}
|
||||
|
||||
return decode(ms);
|
||||
// for testing, slow
|
||||
return readSpans(arena, new long[] { encodedOffset})[0];
|
||||
}
|
||||
|
||||
public DocumentSpans decode(MemorySegment ms) {
|
||||
@@ -63,60 +46,50 @@ public class IndexSpansReaderPlain implements IndexSpansReader {
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocumentSpans[] readSpans(Arena arena, long[] encodedOffsets) throws IOException {
|
||||
long totalSize = 0;
|
||||
int numJobs = 0;
|
||||
public DocumentSpans[] readSpans(Arena arena, long[] encodedOffsets) {
|
||||
|
||||
int readCnt = 0;
|
||||
for (long offset : encodedOffsets) {
|
||||
if (offset < 0)
|
||||
continue;
|
||||
totalSize += SpansCodec.decodeSize(offset);
|
||||
numJobs++;
|
||||
readCnt ++;
|
||||
}
|
||||
|
||||
if (readCnt == 0) {
|
||||
return new DocumentSpans[encodedOffsets.length];
|
||||
}
|
||||
|
||||
long[] offsets = new long[readCnt];
|
||||
int[] sizes = new int[readCnt];
|
||||
|
||||
for (int idx = 0, j = 0; idx < encodedOffsets.length; idx++) {
|
||||
if (encodedOffsets[idx] < 0)
|
||||
continue;
|
||||
long offset = encodedOffsets[idx];
|
||||
|
||||
offsets[j] = SpansCodec.decodeStartOffset(offset);
|
||||
sizes[j] = SpansCodec.decodeSize(offset);
|
||||
j++;
|
||||
}
|
||||
|
||||
List<MemorySegment> buffers = uringReader.readUnalignedInDirectMode(arena, offsets, sizes, 4096);
|
||||
|
||||
DocumentSpans[] ret = new DocumentSpans[encodedOffsets.length];
|
||||
if (numJobs == 0) return ret;
|
||||
|
||||
CountDownLatch latch = new CountDownLatch(numJobs);
|
||||
MemorySegment segment = arena.allocate(totalSize, 8);
|
||||
|
||||
long bufferOffset = 0;
|
||||
for (int idx = 0; idx < encodedOffsets.length; idx++) {
|
||||
long size = SpansCodec.decodeSize(encodedOffsets[idx]);
|
||||
long start = SpansCodec.decodeStartOffset(encodedOffsets[idx]);
|
||||
|
||||
MemorySegment slice = segment.asSlice(bufferOffset, size);
|
||||
bufferOffset += size;
|
||||
|
||||
int i = idx;
|
||||
forkJoinPool.execute(() -> {
|
||||
var buffer = slice.asByteBuffer();
|
||||
try {
|
||||
spansFileChannels[i% spansFileChannels.length].read(buffer, start);
|
||||
ret[i] = decode(slice);
|
||||
}
|
||||
catch (IOException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
finally {
|
||||
latch.countDown();
|
||||
}
|
||||
});
|
||||
}
|
||||
try {
|
||||
latch.await();
|
||||
}
|
||||
catch (InterruptedException ex) {
|
||||
Thread.currentThread().interrupt();
|
||||
for (int idx = 0, j = 0; idx < encodedOffsets.length; idx++) {
|
||||
if (encodedOffsets[idx] < 0)
|
||||
continue;
|
||||
ret[idx] = decode(buffers.get(j++));
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
for (var spansFileChannel : spansFileChannels) {
|
||||
spansFileChannel.close();
|
||||
}
|
||||
uringReader.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -12,7 +12,7 @@ import java.nio.file.StandardOpenOption;
|
||||
|
||||
public class IndexSpansWriter implements AutoCloseable {
|
||||
private final FileChannel outputChannel;
|
||||
private final ByteBuffer work = ByteBuffer.allocate(65536).order(ByteOrder.nativeOrder());
|
||||
private final ByteBuffer work = ByteBuffer.allocate(4*1024*1024).order(ByteOrder.nativeOrder());
|
||||
|
||||
private long stateStartOffset = -1;
|
||||
private int stateLength = -1;
|
||||
@@ -55,7 +55,7 @@ public class IndexSpansWriter implements AutoCloseable {
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
ByteBuffer footer = SpansCodec.createSpanFilesFooter(SpansCodec.SpansCodecVersion.PLAIN);
|
||||
ByteBuffer footer = SpansCodec.createSpanFilesFooter(SpansCodec.SpansCodecVersion.PLAIN, (int) (4096 - (outputChannel.position() & 4095)));
|
||||
outputChannel.position(outputChannel.size());
|
||||
while (footer.hasRemaining()) {
|
||||
outputChannel.write(footer, outputChannel.size());
|
||||
|
@@ -10,7 +10,7 @@ public class SpansCodec {
|
||||
public static int MAGIC_INT = 0xF000F000;
|
||||
public static int FOOTER_SIZE = 8;
|
||||
|
||||
enum SpansCodecVersion {
|
||||
public enum SpansCodecVersion {
|
||||
@Deprecated
|
||||
COMPRESSED,
|
||||
PLAIN
|
||||
@@ -26,12 +26,17 @@ public class SpansCodec {
|
||||
return encoded >>> 28;
|
||||
}
|
||||
|
||||
public static long decodeSize(long encoded) {
|
||||
return encoded & 0x0FFF_FFFFL;
|
||||
public static int decodeSize(long encoded) {
|
||||
return (int) (encoded & 0x0FFF_FFFFL);
|
||||
}
|
||||
|
||||
public static ByteBuffer createSpanFilesFooter(SpansCodecVersion version) {
|
||||
ByteBuffer footer = ByteBuffer.allocate(FOOTER_SIZE);
|
||||
public static ByteBuffer createSpanFilesFooter(SpansCodecVersion version, int padSize) {
|
||||
if (padSize < FOOTER_SIZE) {
|
||||
padSize += 4096;
|
||||
}
|
||||
|
||||
ByteBuffer footer = ByteBuffer.allocate(padSize);
|
||||
footer.position(padSize - FOOTER_SIZE);
|
||||
footer.putInt(SpansCodec.MAGIC_INT);
|
||||
footer.put((byte) version.ordinal());
|
||||
footer.put((byte) 0);
|
||||
|
@@ -21,6 +21,7 @@ dependencies {
|
||||
implementation project(':code:common:db')
|
||||
|
||||
implementation project(':code:libraries:array')
|
||||
implementation project(':code:libraries:native')
|
||||
implementation project(':code:libraries:btree')
|
||||
implementation project(':code:libraries:term-frequency-dict')
|
||||
implementation project(':code:common:linkdb')
|
||||
|
@@ -0,0 +1,262 @@
|
||||
package nu.marginalia.index.perftest;
|
||||
|
||||
import nu.marginalia.ffi.LinuxSystemCalls;
|
||||
import nu.marginalia.uring.UringFileReader;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.foreign.Arena;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.stream.LongStream;
|
||||
|
||||
public class IoPatternsMain {
|
||||
|
||||
static void testBuffered(int sz, int small, int large, int iters) {
|
||||
try {
|
||||
Path largeFile = Path.of("/home/vlofgren/largefile.dat");
|
||||
long fileSize = Files.size(largeFile);
|
||||
|
||||
Random r = new Random();
|
||||
List<MemorySegment> segments = new ArrayList<>();
|
||||
for (int i = 0; i < sz; i++) {
|
||||
if (small == large) {
|
||||
segments.add(Arena.ofAuto().allocate(small));
|
||||
}
|
||||
else {
|
||||
segments.add(Arena.ofAuto().allocate(r.nextInt(small, large)));
|
||||
}
|
||||
}
|
||||
List<Long> offsets = new ArrayList<>();
|
||||
|
||||
long[] samples = new long[1000];
|
||||
int si = 0;
|
||||
|
||||
try (UringFileReader reader = new UringFileReader(largeFile, false)) {
|
||||
for (int iter = 0; iter < iters; ) {
|
||||
if (si == samples.length) {
|
||||
Arrays.sort(samples);
|
||||
double p1 = samples[10] / 1_000.;
|
||||
double p10 = samples[100] / 1_000.;
|
||||
double p90 = samples[900] / 1_000.;
|
||||
double p99 = samples[990] / 1_000.;
|
||||
double avg = LongStream.of(samples).average().getAsDouble() / 1000.;
|
||||
System.out.println("B"+"\t"+avg+"\t"+p1 + " " + p10 + " " + p90 + " " + p99);
|
||||
si = 0;
|
||||
iter++;
|
||||
}
|
||||
|
||||
offsets.clear();
|
||||
for (int i = 0; i < sz; i++) {
|
||||
offsets.add(r.nextLong(0, fileSize - 256));
|
||||
}
|
||||
|
||||
long st = System.nanoTime();
|
||||
reader.read(segments, offsets);
|
||||
long et = System.nanoTime();
|
||||
|
||||
samples[si++] = et - st;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
static void testBufferedPread(int sz, int iters) {
|
||||
try {
|
||||
Path largeFile = Path.of("/home/vlofgren/largefile.dat");
|
||||
long fileSize = Files.size(largeFile);
|
||||
|
||||
Random r = new Random();
|
||||
List<MemorySegment> segments = new ArrayList<>();
|
||||
for (int i = 0; i < sz; i++) {
|
||||
segments.add(Arena.ofAuto().allocate(r.nextInt(24, 256)));
|
||||
}
|
||||
List<Long> offsets = new ArrayList<>();
|
||||
|
||||
long[] samples = new long[1000];
|
||||
int si = 0;
|
||||
|
||||
int fd = -1;
|
||||
try {
|
||||
fd = LinuxSystemCalls.openBuffered(largeFile);
|
||||
LinuxSystemCalls.fadviseRandom(fd);
|
||||
|
||||
for (int iter = 0; iter < iters; ) {
|
||||
if (si == samples.length) {
|
||||
Arrays.sort(samples);
|
||||
double p1 = samples[10] / 1_000.;
|
||||
double p10 = samples[100] / 1_000.;
|
||||
double p90 = samples[900] / 1_000.;
|
||||
double p99 = samples[990] / 1_000.;
|
||||
double avg = LongStream.of(samples).average().getAsDouble() / 1000.;
|
||||
System.out.println("BP"+"\t"+avg+"\t"+p1 + " " + p10 + " " + p90 + " " + p99);
|
||||
si = 0;
|
||||
iter++;
|
||||
}
|
||||
|
||||
offsets.clear();
|
||||
for (int i = 0; i < sz; i++) {
|
||||
offsets.add(r.nextLong(0, fileSize - 256));
|
||||
}
|
||||
|
||||
long st = System.nanoTime();
|
||||
for (int i = 0; i < sz; i++) {
|
||||
LinuxSystemCalls.readAt(fd, segments.get(i), offsets.get(i));
|
||||
}
|
||||
long et = System.nanoTime();
|
||||
|
||||
samples[si++] = et - st;
|
||||
}
|
||||
}
|
||||
finally {
|
||||
LinuxSystemCalls.closeFd(fd);
|
||||
}
|
||||
}
|
||||
catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void testDirect(int blockSize, int sz, int iters) {
|
||||
try {
|
||||
Path largeFile = Path.of("/home/vlofgren/largefile.dat");
|
||||
int fileSizeBlocks = (int) ((Files.size(largeFile) & -blockSize) / blockSize);
|
||||
|
||||
Random r = new Random();
|
||||
List<MemorySegment> segments = new ArrayList<>();
|
||||
for (int i = 0; i < sz; i++) {
|
||||
segments.add(Arena.ofAuto().allocate(blockSize, blockSize));
|
||||
}
|
||||
List<Long> offsets = new ArrayList<>();
|
||||
|
||||
long[] samples = new long[1000];
|
||||
int si = 0;
|
||||
|
||||
try (UringFileReader reader = new UringFileReader(largeFile, true)) {
|
||||
for (int iter = 0; iter < iters; ) {
|
||||
if (si == samples.length) {
|
||||
Arrays.sort(samples);
|
||||
double p1 = samples[10] / 1_000.;
|
||||
double p10 = samples[100] / 1_000.;
|
||||
double p90 = samples[900] / 1_000.;
|
||||
double p99 = samples[990] / 1_000.;
|
||||
double avg = LongStream.of(samples).average().getAsDouble() / 1000.;
|
||||
System.out.println("DN"+blockSize+"\t"+avg+"\t"+p1 + " " + p10 + " " + p90 + " " + p99);
|
||||
si = 0;
|
||||
iters++;
|
||||
}
|
||||
|
||||
offsets.clear();
|
||||
for (int i = 0; i < sz; i++) {
|
||||
offsets.add(blockSize * r.nextLong(0, fileSizeBlocks));
|
||||
}
|
||||
|
||||
long st = System.nanoTime();
|
||||
reader.read(segments, offsets);
|
||||
long et = System.nanoTime();
|
||||
|
||||
samples[si++] = et - st;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void testDirect1(int blockSize, int iters) {
|
||||
try {
|
||||
Path largeFile = Path.of("/home/vlofgren/largefile.dat");
|
||||
int fileSizeBlocks = (int) ((Files.size(largeFile) & -blockSize) / blockSize);
|
||||
|
||||
Random r = new Random();
|
||||
MemorySegment segment = Arena.global().allocate(blockSize, blockSize);
|
||||
|
||||
long[] samples = new long[1000];
|
||||
int si = 0;
|
||||
|
||||
int fd = LinuxSystemCalls.openDirect(largeFile);
|
||||
if (fd < 0) {
|
||||
throw new IOException("open failed");
|
||||
}
|
||||
try {
|
||||
for (int iter = 0; iter < iters; ) {
|
||||
if (si == samples.length) {
|
||||
Arrays.sort(samples);
|
||||
double p1 = samples[10] / 1_000.;
|
||||
double p10 = samples[100] / 1_000.;
|
||||
double p90 = samples[900] / 1_000.;
|
||||
double p99 = samples[990] / 1_000.;
|
||||
double avg = LongStream.of(samples).average().getAsDouble() / 1000.;
|
||||
System.out.println("D1"+blockSize+"\t"+avg+"\t"+p1 + " " + p10 + " " + p90 + " " + p99);
|
||||
si = 0;
|
||||
iters++;
|
||||
}
|
||||
|
||||
|
||||
long st = System.nanoTime();
|
||||
int ret;
|
||||
long readOffset = blockSize * r.nextLong(0, fileSizeBlocks);
|
||||
if (blockSize != (ret = LinuxSystemCalls.readAt(fd, segment, readOffset))) {
|
||||
throw new IOException("pread failed: " + ret);
|
||||
}
|
||||
long et = System.nanoTime();
|
||||
|
||||
samples[si++] = et - st;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
finally {
|
||||
LinuxSystemCalls.closeFd(fd);
|
||||
}
|
||||
}
|
||||
catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
// Thread.ofPlatform().start(() -> testBuffered(128, 32, 65536,1000));
|
||||
Thread.ofPlatform().start(() -> testDirect(8192*4, 128,1000));
|
||||
// Thread.ofPlatform().start(() -> testBuffered(128, 1000));
|
||||
// Thread.ofPlatform().start(() -> testBuffered(128, 1000));
|
||||
// Thread.ofPlatform().start(() -> testBuffered(128, 1000));
|
||||
// Thread.ofPlatform().start(() -> testBufferedPread(128, 1000));
|
||||
|
||||
// Thread.ofPlatform().start(() -> testDirect1(1024, 1000));
|
||||
// Thread.ofPlatform().start(() -> testDirect1(1024, 1000));
|
||||
// Thread.ofPlatform().start(() -> testDirect1(1024, 1000));
|
||||
// Thread.ofPlatform().start(() -> testDirect1(1024*1024, 1000));
|
||||
// Thread.ofPlatform().start(() -> testDirect1(1024*1024, 1000));
|
||||
// Thread.ofPlatform().start(() -> testDirect(512, 512,1000));
|
||||
// Thread.ofPlatform().start(() -> testDirect(512, 512,1000));
|
||||
// Thread.ofPlatform().start(() -> testDirect(512, 512,1000));
|
||||
// Thread.ofPlatform().start(() -> testDirect(512, 100));
|
||||
// Thread.ofPlatform().start(() -> testDirect(512, 100));
|
||||
// Thread.ofPlatform().start(() -> testDirect(512, 100));
|
||||
// Thread.ofPlatform().start(() -> testDirect(512, 100));
|
||||
// Thread.ofPlatform().start(() -> testBuffered(512, 1000));
|
||||
// Thread.ofPlatform().start(() -> testBuffered(512, 1000));
|
||||
// Thread.ofPlatform().start(() -> testBuffered(512, 1000));
|
||||
// Thread.ofPlatform().start(() -> testBuffered(512, 1000));
|
||||
// Thread.ofPlatform().start(() -> testBuffered(100));
|
||||
// Thread.ofPlatform().start(() -> testBuffered(100));
|
||||
|
||||
for (;;);
|
||||
// testBuffered(100);
|
||||
}
|
||||
}
|
@@ -20,6 +20,7 @@ import nu.marginalia.index.model.SearchParameters;
|
||||
import nu.marginalia.index.model.SearchTerms;
|
||||
import nu.marginalia.index.positions.PositionsFileReader;
|
||||
import nu.marginalia.index.query.IndexQuery;
|
||||
import nu.marginalia.index.query.IndexSearchBudget;
|
||||
import nu.marginalia.index.results.DomainRankingOverrides;
|
||||
import nu.marginalia.index.results.IndexResultRankingService;
|
||||
import nu.marginalia.index.results.model.ids.CombinedDocIdList;
|
||||
@@ -38,6 +39,7 @@ import java.time.Instant;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
|
||||
public class PerfTestMain {
|
||||
static Duration warmupTime = Duration.ofMinutes(1);
|
||||
@@ -64,6 +66,8 @@ public class PerfTestMain {
|
||||
case "lookup" -> runLookup(indexDir, homeDir, query);
|
||||
case "execution" -> runExecution(indexDir, homeDir, query);
|
||||
}
|
||||
|
||||
System.exit(0);
|
||||
}
|
||||
catch (NumberFormatException e) {
|
||||
System.err.println("Arguments: data-dir index-dir query");
|
||||
@@ -118,8 +122,7 @@ public class PerfTestMain {
|
||||
|
||||
public static void runValuation(Path homeDir,
|
||||
Path indexDir,
|
||||
String rawQuery) throws IOException, SQLException
|
||||
{
|
||||
String rawQuery) throws IOException, SQLException, TimeoutException {
|
||||
|
||||
CombinedIndexReader indexReader = createCombinedIndexReader(indexDir);
|
||||
QueryFactory queryFactory = createQueryFactory(homeDir);
|
||||
@@ -137,48 +140,39 @@ public class PerfTestMain {
|
||||
|
||||
SearchParameters searchParameters = new SearchParameters(parsedQuery, new SearchSetAny());
|
||||
|
||||
List<IndexQuery> queries = indexReader.createQueries(new SearchTerms(searchParameters.query, searchParameters.compiledQueryIds), searchParameters.queryParams);
|
||||
List<IndexQuery> queries = indexReader.createQueries(new SearchTerms(searchParameters.query, searchParameters.compiledQueryIds), searchParameters.queryParams, new IndexSearchBudget(10_000));
|
||||
|
||||
TLongArrayList allResults = new TLongArrayList();
|
||||
LongQueryBuffer buffer = new LongQueryBuffer(4096);
|
||||
LongQueryBuffer buffer = new LongQueryBuffer(512);
|
||||
|
||||
for (var query : queries) {
|
||||
while (query.hasMore() && allResults.size() < 4096 ) {
|
||||
while (query.hasMore() && allResults.size() < 512 ) {
|
||||
query.getMoreResults(buffer);
|
||||
allResults.addAll(buffer.copyData());
|
||||
}
|
||||
if (allResults.size() >= 4096)
|
||||
if (allResults.size() >= 512)
|
||||
break;
|
||||
}
|
||||
allResults.sort();
|
||||
if (allResults.size() > 4096) {
|
||||
allResults.subList(4096, allResults.size()).clear();
|
||||
if (allResults.size() > 512) {
|
||||
allResults.subList(512, allResults.size()).clear();
|
||||
}
|
||||
|
||||
var docIds = new CombinedDocIdList(allResults.toArray());
|
||||
var rankingContext = ResultRankingContext.create(indexReader, searchParameters);
|
||||
var rankingData = rankingService.prepareRankingData(rankingContext, new CombinedDocIdList(allResults.toArray()), null);
|
||||
|
||||
System.out.println("Running warmup loop!");
|
||||
int sum = 0;
|
||||
|
||||
Instant runEndTime = Instant.now().plus(warmupTime);
|
||||
|
||||
int iter;
|
||||
for (iter = 0;; iter++) {
|
||||
sum += rankingService.rankResults(rankingContext, docIds, false).size();
|
||||
if ((iter % 100) == 0 && Instant.now().isAfter(runEndTime)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
System.out.println("Warmup complete after " + iter + " iters!");
|
||||
|
||||
runEndTime = Instant.now().plus(runTime);
|
||||
Instant runEndTime = Instant.now().plus(runTime);
|
||||
Instant runStartTime = Instant.now();
|
||||
int sum2 = 0;
|
||||
List<Double> times = new ArrayList<>();
|
||||
|
||||
int iter;
|
||||
for (iter = 0;; iter++) {
|
||||
IndexSearchBudget budget = new IndexSearchBudget(10000);
|
||||
long start = System.nanoTime();
|
||||
sum2 += rankingService.rankResults(rankingContext, docIds, false).size();
|
||||
sum2 += rankingService.rankResults(budget, rankingContext, rankingData, false).size();
|
||||
long end = System.nanoTime();
|
||||
times.add((end - start)/1_000_000.);
|
||||
|
||||
@@ -186,14 +180,19 @@ public class PerfTestMain {
|
||||
if (Instant.now().isAfter(runEndTime)) {
|
||||
break;
|
||||
}
|
||||
System.out.println(Duration.between(runStartTime, Instant.now()).toMillis() / 1000. + " best times: " + (allResults.size() / 4096.) * times.stream().mapToDouble(Double::doubleValue).sorted().limit(3).average().orElse(-1));
|
||||
if (times.size() > 100) {
|
||||
double[] timesSample = times.stream().mapToDouble(Double::doubleValue).skip(times.size() - 100).sorted().toArray();
|
||||
System.out.format("P1: %f P10: %f, P90: %f, P99: %f\n", timesSample[1], timesSample[10], timesSample[90], timesSample[99]);
|
||||
}
|
||||
System.out.println(Duration.between(runStartTime, Instant.now()).toMillis() / 1000. + " best times: " + (allResults.size() / 512.) * times.stream().mapToDouble(Double::doubleValue).sorted().limit(3).average().orElse(-1));
|
||||
}
|
||||
}
|
||||
System.out.println("Benchmark complete after " + iter + " iters!");
|
||||
System.out.println("Best times: " + (allResults.size() / 4096.) * times.stream().mapToDouble(Double::doubleValue).sorted().limit(3).average().orElse(-1));
|
||||
|
||||
System.out.println("Best times: " + (allResults.size() / 512.) * times.stream().mapToDouble(Double::doubleValue).sorted().limit(3).average().orElse(-1));
|
||||
System.out.println("Warmup sum: " + sum);
|
||||
System.out.println("Main sum: " + sum2);
|
||||
System.out.println(docIds.size());
|
||||
System.out.println(rankingData.size());
|
||||
}
|
||||
|
||||
public static void runExecution(Path homeDir,
|
||||
@@ -216,24 +215,12 @@ public class PerfTestMain {
|
||||
System.out.println("Running warmup loop!");
|
||||
int sum = 0;
|
||||
|
||||
Instant runEndTime = Instant.now().plus(warmupTime);
|
||||
|
||||
int iter;
|
||||
for (iter = 0;; iter++) {
|
||||
SearchParameters searchParameters = new SearchParameters(parsedQuery, new SearchSetAny());
|
||||
var execution = new IndexQueryExecution(searchParameters, rankingService, indexReader);
|
||||
execution.run();
|
||||
sum += execution.itemsProcessed();
|
||||
if ((iter % 100) == 0 && Instant.now().isAfter(runEndTime)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
System.out.println("Warmup complete after " + iter + " iters!");
|
||||
|
||||
runEndTime = Instant.now().plus(runTime);
|
||||
Instant runEndTime = Instant.now().plus(runTime);
|
||||
Instant runStartTime = Instant.now();
|
||||
int sum2 = 0;
|
||||
List<Double> rates = new ArrayList<>();
|
||||
List<Double> times = new ArrayList<>();
|
||||
int iter;
|
||||
for (iter = 0;; iter++) {
|
||||
SearchParameters searchParameters = new SearchParameters(parsedQuery, new SearchSetAny());
|
||||
var execution = new IndexQueryExecution(searchParameters, rankingService, indexReader);
|
||||
@@ -242,14 +229,20 @@ public class PerfTestMain {
|
||||
long end = System.nanoTime();
|
||||
sum2 += execution.itemsProcessed();
|
||||
rates.add(execution.itemsProcessed() / ((end - start)/1_000_000_000.));
|
||||
|
||||
times.add((end - start)/1_000_000.);
|
||||
indexReader.reset();
|
||||
if ((iter % 100) == 0) {
|
||||
if (Instant.now().isAfter(runEndTime)) {
|
||||
break;
|
||||
}
|
||||
if (times.size() > 100) {
|
||||
double[] timesSample = times.stream().mapToDouble(Double::doubleValue).skip(times.size() - 100).sorted().toArray();
|
||||
System.out.format("P1: %f P10: %f, P90: %f, P99: %f\n", timesSample[1], timesSample[10], timesSample[90], timesSample[99]);
|
||||
}
|
||||
System.out.println(Duration.between(runStartTime, Instant.now()).toMillis() / 1000. + " best rates: " + rates.stream().mapToDouble(Double::doubleValue).map(i -> -i).sorted().map(i -> -i).limit(3).average().orElse(-1));
|
||||
}
|
||||
}
|
||||
|
||||
System.out.println("Benchmark complete after " + iter + " iters!");
|
||||
System.out.println("Best counts: " + rates.stream().mapToDouble(Double::doubleValue).map(i -> -i).sorted().map(i -> -i).limit(3).average().orElse(-1));
|
||||
System.out.println("Warmup sum: " + sum);
|
||||
@@ -277,35 +270,18 @@ public class PerfTestMain {
|
||||
SearchParameters searchParameters = new SearchParameters(parsedQuery, new SearchSetAny());
|
||||
|
||||
|
||||
Instant runEndTime = Instant.now().plus(warmupTime);
|
||||
Instant runEndTime = Instant.now().plus(runTime);
|
||||
|
||||
LongQueryBuffer buffer = new LongQueryBuffer(4096);
|
||||
LongQueryBuffer buffer = new LongQueryBuffer(512);
|
||||
int sum1 = 0;
|
||||
int iter;
|
||||
for (iter = 0;; iter++) {
|
||||
List<IndexQuery> queries = indexReader.createQueries(new SearchTerms(searchParameters.query, searchParameters.compiledQueryIds), searchParameters.queryParams);
|
||||
|
||||
for (var query : queries) {
|
||||
while (query.hasMore()) {
|
||||
query.getMoreResults(buffer);
|
||||
sum1 += buffer.end;
|
||||
buffer.reset();
|
||||
}
|
||||
}
|
||||
|
||||
if ((iter % 100) == 0 && Instant.now().isAfter(runEndTime)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
System.out.println("Warmup complete after " + iter + " iters with sum1 = " + sum1);
|
||||
|
||||
runEndTime = Instant.now().plus(runTime);
|
||||
Instant runStartTime = Instant.now();
|
||||
int sum2 = 0;
|
||||
List<Double> times = new ArrayList<>();
|
||||
for (iter = 0;; iter++) {
|
||||
List<IndexQuery> queries = indexReader.createQueries(new SearchTerms(searchParameters.query, searchParameters.compiledQueryIds), searchParameters.queryParams);
|
||||
indexReader.reset();
|
||||
List<IndexQuery> queries = indexReader.createQueries(new SearchTerms(searchParameters.query, searchParameters.compiledQueryIds), searchParameters.queryParams, new IndexSearchBudget(150));
|
||||
|
||||
long start = System.nanoTime();
|
||||
for (var query : queries) {
|
||||
@@ -316,12 +292,16 @@ public class PerfTestMain {
|
||||
}
|
||||
}
|
||||
long end = System.nanoTime();
|
||||
times.add((end - start)/1_000_000.);
|
||||
times.add((end - start)/1_000_000_000.);
|
||||
|
||||
if ((iter % 100) == 0) {
|
||||
if ((iter % 10) == 0) {
|
||||
if (Instant.now().isAfter(runEndTime)) {
|
||||
break;
|
||||
}
|
||||
if (times.size() > 100) {
|
||||
double[] timesSample = times.stream().mapToDouble(Double::doubleValue).skip(times.size() - 100).sorted().toArray();
|
||||
System.out.format("P1: %f P10: %f, P90: %f, P99: %f\n", timesSample[1], timesSample[10], timesSample[90], timesSample[99]);
|
||||
}
|
||||
System.out.println(Duration.between(runStartTime, Instant.now()).toMillis() / 1000. + " best times: " + times.stream().mapToDouble(Double::doubleValue).sorted().limit(3).average().orElse(-1));
|
||||
}
|
||||
}
|
||||
|
@@ -15,6 +15,7 @@ apply from: "$rootProject.projectDir/srcsets.gradle"
|
||||
|
||||
dependencies {
|
||||
implementation project(':code:libraries:array')
|
||||
implementation project(':code:libraries:native')
|
||||
implementation project(':code:libraries:btree')
|
||||
implementation project(':code:libraries:coded-sequence')
|
||||
implementation project(':code:libraries:random-write-funnel')
|
||||
|
@@ -1,32 +1,26 @@
|
||||
package nu.marginalia.index;
|
||||
|
||||
import nu.marginalia.array.page.LongQueryBuffer;
|
||||
import nu.marginalia.btree.BTreeReader;
|
||||
import nu.marginalia.index.query.EntrySource;
|
||||
|
||||
import static java.lang.Math.min;
|
||||
import nu.marginalia.skiplist.SkipListReader;
|
||||
|
||||
public class FullIndexEntrySource implements EntrySource {
|
||||
private final String name;
|
||||
private final BTreeReader reader;
|
||||
|
||||
int pos;
|
||||
int endOffset;
|
||||
|
||||
final int entrySize;
|
||||
private final SkipListReader reader;
|
||||
private final long wordId;
|
||||
|
||||
public FullIndexEntrySource(String name,
|
||||
BTreeReader reader,
|
||||
int entrySize,
|
||||
SkipListReader reader,
|
||||
long wordId) {
|
||||
this.name = name;
|
||||
this.reader = reader;
|
||||
this.entrySize = entrySize;
|
||||
this.wordId = wordId;
|
||||
|
||||
pos = 0;
|
||||
endOffset = pos + entrySize * reader.numEntries();
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -36,32 +30,14 @@ public class FullIndexEntrySource implements EntrySource {
|
||||
|
||||
@Override
|
||||
public void read(LongQueryBuffer buffer) {
|
||||
buffer.reset();
|
||||
buffer.end = min(buffer.end, endOffset - pos);
|
||||
reader.readData(buffer.data, buffer.end, pos);
|
||||
pos += buffer.end;
|
||||
|
||||
destagger(buffer);
|
||||
buffer.uniq();
|
||||
}
|
||||
|
||||
private void destagger(LongQueryBuffer buffer) {
|
||||
if (entrySize == 1)
|
||||
return;
|
||||
|
||||
for (int ri = entrySize, wi=1; ri < buffer.end ; ri+=entrySize, wi++) {
|
||||
buffer.data.set(wi, buffer.data.get(ri));
|
||||
}
|
||||
|
||||
buffer.end /= entrySize;
|
||||
reader.getData(buffer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasMore() {
|
||||
return pos < endOffset;
|
||||
return !reader.atEnd();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String indexName() {
|
||||
return name + ":" + Long.toHexString(wordId);
|
||||
|
@@ -2,16 +2,17 @@ package nu.marginalia.index;
|
||||
|
||||
import nu.marginalia.array.LongArray;
|
||||
import nu.marginalia.array.LongArrayFactory;
|
||||
import nu.marginalia.array.pool.BufferPool;
|
||||
import nu.marginalia.btree.BTreeReader;
|
||||
import nu.marginalia.ffi.LinuxSystemCalls;
|
||||
import nu.marginalia.index.positions.PositionsFileReader;
|
||||
import nu.marginalia.index.positions.TermData;
|
||||
import nu.marginalia.index.query.EmptyEntrySource;
|
||||
import nu.marginalia.index.query.EntrySource;
|
||||
import nu.marginalia.index.query.ReverseIndexRejectFilter;
|
||||
import nu.marginalia.index.query.ReverseIndexRetainFilter;
|
||||
import nu.marginalia.index.query.*;
|
||||
import nu.marginalia.index.query.filter.QueryFilterLetThrough;
|
||||
import nu.marginalia.index.query.filter.QueryFilterNoPass;
|
||||
import nu.marginalia.index.query.filter.QueryFilterStepIf;
|
||||
import nu.marginalia.skiplist.SkipListConstants;
|
||||
import nu.marginalia.skiplist.SkipListReader;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@@ -20,10 +21,12 @@ import java.lang.foreign.Arena;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
public class FullReverseIndexReader {
|
||||
private final LongArray words;
|
||||
private final LongArray documents;
|
||||
|
||||
private final long wordsDataOffset;
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
private final BTreeReader wordsBTreeReader;
|
||||
@@ -31,6 +34,8 @@ public class FullReverseIndexReader {
|
||||
|
||||
private final PositionsFileReader positionsFileReader;
|
||||
|
||||
private final BufferPool dataPool;
|
||||
|
||||
public FullReverseIndexReader(String name,
|
||||
Path words,
|
||||
Path documents,
|
||||
@@ -44,6 +49,7 @@ public class FullReverseIndexReader {
|
||||
this.documents = null;
|
||||
this.wordsBTreeReader = null;
|
||||
this.wordsDataOffset = -1;
|
||||
this.dataPool = null;
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -52,6 +58,11 @@ public class FullReverseIndexReader {
|
||||
this.words = LongArrayFactory.mmapForReadingShared(words);
|
||||
this.documents = LongArrayFactory.mmapForReadingShared(documents);
|
||||
|
||||
LinuxSystemCalls.madviseRandom(this.words.getMemorySegment());
|
||||
LinuxSystemCalls.madviseRandom(this.documents.getMemorySegment());
|
||||
|
||||
dataPool = new BufferPool(documents, SkipListConstants.BLOCK_SIZE, (int) (Long.getLong("index.bufferPoolSize", 512*1024*1024L) / SkipListConstants.BLOCK_SIZE));
|
||||
|
||||
wordsBTreeReader = new BTreeReader(this.words, ReverseIndexParameters.wordsBTreeContext, 0);
|
||||
wordsDataOffset = wordsBTreeReader.getHeader().dataOffsetLongs();
|
||||
|
||||
@@ -62,6 +73,11 @@ public class FullReverseIndexReader {
|
||||
}
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
dataPool.reset();
|
||||
}
|
||||
|
||||
|
||||
private void selfTest() {
|
||||
logger.info("Running self test program");
|
||||
|
||||
@@ -76,6 +92,15 @@ public class FullReverseIndexReader {
|
||||
ReverseIndexSelfTest.runSelfTest6(wordsDataRange, documents);
|
||||
}
|
||||
|
||||
public void eachDocRange(Consumer<LongArray> eachDocRange) {
|
||||
long wordsDataSize = wordsBTreeReader.getHeader().numEntries() * 2L;
|
||||
var wordsDataRange = words.range(wordsDataOffset, wordsDataOffset + wordsDataSize);
|
||||
|
||||
for (long i = 1; i < wordsDataRange.size(); i+=2) {
|
||||
var docsBTreeReader = new BTreeReader(documents, ReverseIndexParameters.fullDocsBTreeContext, wordsDataRange.get(i));
|
||||
eachDocRange.accept(docsBTreeReader.data());
|
||||
}
|
||||
}
|
||||
|
||||
/** Calculate the offset of the word in the documents.
|
||||
* If the return-value is negative, the term does not exist
|
||||
@@ -101,27 +126,27 @@ public class FullReverseIndexReader {
|
||||
if (offset < 0) // No documents
|
||||
return new EmptyEntrySource();
|
||||
|
||||
return new FullIndexEntrySource(name, createReaderNew(offset), 2, termId);
|
||||
return new FullIndexEntrySource(name, getReader(offset), termId);
|
||||
}
|
||||
|
||||
/** Create a filter step requiring the specified termId to exist in the documents */
|
||||
public QueryFilterStepIf also(long termId) {
|
||||
public QueryFilterStepIf also(long termId, IndexSearchBudget budget) {
|
||||
long offset = wordOffset(termId);
|
||||
|
||||
if (offset < 0) // No documents
|
||||
return new QueryFilterNoPass();
|
||||
|
||||
return new ReverseIndexRetainFilter(createReaderNew(offset), name, termId);
|
||||
return new ReverseIndexRetainFilter(getReader(offset), name, termId, budget);
|
||||
}
|
||||
|
||||
/** Create a filter step requiring the specified termId to be absent from the documents */
|
||||
public QueryFilterStepIf not(long termId) {
|
||||
public QueryFilterStepIf not(long termId, IndexSearchBudget budget) {
|
||||
long offset = wordOffset(termId);
|
||||
|
||||
if (offset < 0) // No documents
|
||||
return new QueryFilterLetThrough();
|
||||
|
||||
return new ReverseIndexRejectFilter(createReaderNew(offset));
|
||||
return new ReverseIndexRejectFilter(getReader(offset), budget);
|
||||
}
|
||||
|
||||
/** Return the number of documents with the termId in the index */
|
||||
@@ -131,15 +156,39 @@ public class FullReverseIndexReader {
|
||||
if (offset < 0)
|
||||
return 0;
|
||||
|
||||
return createReaderNew(offset).numEntries();
|
||||
return getReader(offset).estimateSize();
|
||||
}
|
||||
|
||||
/** Create a BTreeReader for the document offset associated with a termId */
|
||||
private BTreeReader createReaderNew(long offset) {
|
||||
return new BTreeReader(
|
||||
documents,
|
||||
ReverseIndexParameters.fullDocsBTreeContext,
|
||||
offset);
|
||||
private SkipListReader getReader(long offset) {
|
||||
return new SkipListReader(dataPool, offset);
|
||||
}
|
||||
|
||||
public TermData[] getTermData(Arena arena,
|
||||
long[] termIds,
|
||||
long[] docIds)
|
||||
{
|
||||
|
||||
long[] offsetsAll = new long[termIds.length * docIds.length];
|
||||
|
||||
for (int i = 0; i < termIds.length; i++) {
|
||||
long termId = termIds[i];
|
||||
long offset = wordOffset(termId);
|
||||
|
||||
if (offset < 0) {
|
||||
// This is likely a bug in the code, but we can't throw an exception here
|
||||
logger.debug("Missing offset for word {}", termId);
|
||||
continue;
|
||||
}
|
||||
|
||||
var reader = getReader(offset);
|
||||
|
||||
// Read the size and offset of the position data
|
||||
var offsetsForTerm = reader.getValueOffsets(docIds);
|
||||
System.arraycopy(offsetsForTerm, 0, offsetsAll, i * docIds.length, docIds.length);
|
||||
}
|
||||
|
||||
return positionsFileReader.getTermData(arena, offsetsAll);
|
||||
}
|
||||
|
||||
public TermData[] getTermData(Arena arena,
|
||||
@@ -156,15 +205,22 @@ public class FullReverseIndexReader {
|
||||
return ret;
|
||||
}
|
||||
|
||||
var reader = createReaderNew(offset);
|
||||
var reader = getReader(offset);
|
||||
|
||||
// Read the size and offset of the position data
|
||||
var offsets = reader.queryData(docIds, 1);
|
||||
var offsets = reader.getValueOffsets(docIds);
|
||||
|
||||
return positionsFileReader.getTermData(arena, offsets);
|
||||
}
|
||||
|
||||
public void close() {
|
||||
try {
|
||||
dataPool.close();
|
||||
}
|
||||
catch (Exception e) {
|
||||
logger.warn("Error while closing bufferPool", e);
|
||||
}
|
||||
|
||||
if (documents != null)
|
||||
documents.close();
|
||||
|
||||
|
@@ -13,7 +13,7 @@ import java.nio.channels.FileChannel;
|
||||
public class PrioIndexEntrySource implements EntrySource {
|
||||
private final String name;
|
||||
|
||||
private final ByteBuffer readData = ByteBuffer.allocate(1024);
|
||||
private final ByteBuffer readData = ByteBuffer.allocate(8*1024);
|
||||
private final BitReader bitReader = new BitReader(readData, this::fillReadBuffer);
|
||||
|
||||
private final FileChannel docsFileChannel;
|
||||
|
@@ -3,6 +3,7 @@ package nu.marginalia.index;
|
||||
import nu.marginalia.array.LongArray;
|
||||
import nu.marginalia.array.LongArrayFactory;
|
||||
import nu.marginalia.btree.BTreeReader;
|
||||
import nu.marginalia.ffi.LinuxSystemCalls;
|
||||
import nu.marginalia.index.query.EmptyEntrySource;
|
||||
import nu.marginalia.index.query.EntrySource;
|
||||
import org.slf4j.Logger;
|
||||
@@ -40,6 +41,8 @@ public class PrioReverseIndexReader {
|
||||
|
||||
this.words = LongArrayFactory.mmapForReadingShared(words);
|
||||
|
||||
LinuxSystemCalls.madviseRandom(this.words.getMemorySegment());
|
||||
|
||||
wordsBTreeReader = new BTreeReader(this.words, ReverseIndexParameters.wordsBTreeContext, 0);
|
||||
wordsDataOffset = wordsBTreeReader.getHeader().dataOffsetLongs();
|
||||
|
||||
|
@@ -5,7 +5,7 @@ import nu.marginalia.btree.model.BTreeContext;
|
||||
|
||||
public class ReverseIndexParameters
|
||||
{
|
||||
public static final BTreeContext prioDocsBTreeContext = new BTreeContext(5, 1, BTreeBlockSize.BS_2048);
|
||||
public static final BTreeContext fullDocsBTreeContext = new BTreeContext(5, 2, BTreeBlockSize.BS_2048);
|
||||
public static final BTreeContext wordsBTreeContext = new BTreeContext(5, 2, BTreeBlockSize.BS_2048);
|
||||
public static final BTreeContext prioDocsBTreeContext = new BTreeContext(5, 1, BTreeBlockSize.BS_512);
|
||||
public static final BTreeContext fullDocsBTreeContext = new BTreeContext(5, 2, BTreeBlockSize.BS_512);
|
||||
public static final BTreeContext wordsBTreeContext = new BTreeContext(5, 2, BTreeBlockSize.BS_512);
|
||||
}
|
||||
|
@@ -14,62 +14,103 @@ import java.nio.file.StandardOpenOption;
|
||||
*
|
||||
* The positions data is concatenated in the file, with each term's metadata
|
||||
* followed by its positions. The metadata is a single byte, and the positions
|
||||
* are encoded using the Elias Gamma code, with zero padded bits at the end to
|
||||
* get octet alignment.
|
||||
*
|
||||
* are encoded varints.
|
||||
* <p></p>
|
||||
*
|
||||
* It is the responsibility of the caller to keep track of the byte offset of
|
||||
* each posting in the file.
|
||||
*/
|
||||
public class PositionsFileConstructor implements AutoCloseable {
|
||||
private final ByteBuffer workBuffer = ByteBuffer.allocate(65536);
|
||||
|
||||
private final Path file;
|
||||
private final FileChannel channel;
|
||||
|
||||
private long offset;
|
||||
|
||||
public PositionsFileConstructor(Path file) throws IOException {
|
||||
this.file = file;
|
||||
|
||||
channel = FileChannel.open(file, StandardOpenOption.CREATE, StandardOpenOption.WRITE);
|
||||
}
|
||||
|
||||
/** Represents a block of positions lists. Each writer thread should hold on to
|
||||
* a block object to ensure the locality of its positions lists.
|
||||
* When finished, commit() must be run.
|
||||
* */
|
||||
public class PositionsFileBlock {
|
||||
private final ByteBuffer workBuffer = ByteBuffer.allocate(1024*1024*16);
|
||||
private long position;
|
||||
|
||||
public PositionsFileBlock(long position) {
|
||||
this.position = position;
|
||||
}
|
||||
|
||||
public boolean fitsData(int size) {
|
||||
return workBuffer.remaining() >= size;
|
||||
}
|
||||
|
||||
public void commit() throws IOException {
|
||||
workBuffer.position(0);
|
||||
workBuffer.limit(workBuffer.capacity());
|
||||
int pos = 0;
|
||||
while (workBuffer.hasRemaining()) {
|
||||
pos += channel.write(workBuffer, this.position + pos + workBuffer.position());
|
||||
}
|
||||
}
|
||||
|
||||
private void relocate() throws IOException {
|
||||
workBuffer.clear();
|
||||
position = channel.position();
|
||||
while (workBuffer.hasRemaining()) {
|
||||
channel.write(workBuffer);
|
||||
}
|
||||
workBuffer.clear();
|
||||
}
|
||||
|
||||
public long position() {
|
||||
return this.position + workBuffer.position();
|
||||
}
|
||||
public void put(byte b) {
|
||||
workBuffer.put(b);
|
||||
}
|
||||
public void put(ByteBuffer buffer) {
|
||||
workBuffer.put(buffer);
|
||||
}
|
||||
}
|
||||
|
||||
public PositionsFileBlock getBlock() throws IOException {
|
||||
synchronized (this) {
|
||||
var block = new PositionsFileBlock(channel.position());
|
||||
block.relocate();
|
||||
return block;
|
||||
}
|
||||
}
|
||||
|
||||
/** Add a term to the positions file
|
||||
*
|
||||
* @param block a block token to ensure data locality
|
||||
* @param termMeta the term metadata
|
||||
* @param positionsBuffer the positions of the term
|
||||
*
|
||||
* @return the offset of the term in the file, with the size of the data in the highest byte
|
||||
*/
|
||||
public long add(byte termMeta, ByteBuffer positionsBuffer) throws IOException {
|
||||
synchronized (file) {
|
||||
int size = 1 + positionsBuffer.remaining();
|
||||
public long add(PositionsFileBlock block, byte termMeta, ByteBuffer positionsBuffer) throws IOException {
|
||||
int size = 1 + positionsBuffer.remaining();
|
||||
|
||||
if (workBuffer.remaining() < size) {
|
||||
workBuffer.flip();
|
||||
channel.write(workBuffer);
|
||||
workBuffer.clear();
|
||||
if (!block.fitsData(size)) {
|
||||
synchronized (this) {
|
||||
block.commit();
|
||||
block.relocate();
|
||||
}
|
||||
}
|
||||
synchronized (file) {
|
||||
long offset = block.position();
|
||||
|
||||
workBuffer.put(termMeta);
|
||||
workBuffer.put(positionsBuffer);
|
||||
block.put(termMeta);
|
||||
block.put(positionsBuffer);
|
||||
|
||||
long ret = PositionCodec.encode(size, offset);
|
||||
|
||||
offset += size;
|
||||
|
||||
return ret;
|
||||
return PositionCodec.encode(size, offset);
|
||||
}
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
if (workBuffer.hasRemaining()) {
|
||||
workBuffer.flip();
|
||||
|
||||
while (workBuffer.hasRemaining())
|
||||
channel.write(workBuffer);
|
||||
}
|
||||
|
||||
channel.force(false);
|
||||
channel.close();
|
||||
}
|
||||
|
@@ -1,46 +0,0 @@
|
||||
package nu.marginalia.index.construction.full;
|
||||
|
||||
import nu.marginalia.array.LongArray;
|
||||
import nu.marginalia.array.algo.LongArrayTransformations;
|
||||
import nu.marginalia.btree.BTreeWriter;
|
||||
import nu.marginalia.btree.model.BTreeContext;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/** Constructs the BTrees in a reverse index */
|
||||
public class FullIndexBTreeTransformer implements LongArrayTransformations.LongIOTransformer {
|
||||
private final BTreeWriter writer;
|
||||
private final int entrySize;
|
||||
private final LongArray documentsArray;
|
||||
|
||||
long start = 0;
|
||||
long writeOffset = 0;
|
||||
|
||||
public FullIndexBTreeTransformer(LongArray urlsFileMap,
|
||||
int entrySize,
|
||||
BTreeContext bTreeContext,
|
||||
LongArray documentsArray) {
|
||||
this.documentsArray = documentsArray;
|
||||
this.writer = new BTreeWriter(urlsFileMap, bTreeContext);
|
||||
this.entrySize = entrySize;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long transform(long pos, long end) throws IOException {
|
||||
|
||||
final int size = (int) ((end - start) / entrySize);
|
||||
|
||||
if (size == 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
final long offsetForBlock = writeOffset;
|
||||
|
||||
writeOffset += writer.write(writeOffset, size,
|
||||
mapRegion -> mapRegion.transferFrom(documentsArray, start, 0, end - start)
|
||||
);
|
||||
|
||||
start = end;
|
||||
return offsetForBlock;
|
||||
}
|
||||
}
|
@@ -0,0 +1,40 @@
|
||||
package nu.marginalia.index.construction.full;
|
||||
|
||||
import nu.marginalia.array.LongArray;
|
||||
import nu.marginalia.array.algo.LongArrayTransformations;
|
||||
import nu.marginalia.skiplist.SkipListWriter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
|
||||
/** Constructs the BTrees in a reverse index */
|
||||
public class FullIndexSkipListTransformer implements LongArrayTransformations.LongIOTransformer, AutoCloseable {
|
||||
private final SkipListWriter writer;
|
||||
private final LongArray documentsArray;
|
||||
|
||||
long start = 0;
|
||||
|
||||
public FullIndexSkipListTransformer(Path docsOutputFile,
|
||||
LongArray documentsArray) throws IOException {
|
||||
this.documentsArray = documentsArray;
|
||||
this.writer = new SkipListWriter(docsOutputFile);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long transform(long pos, long end) throws IOException {
|
||||
|
||||
final int size = (int) ((end - start) / 2);
|
||||
|
||||
if (size == 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
long offset = writer.writeList(documentsArray, start, size);
|
||||
start = end;
|
||||
return offset;
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
writer.close();
|
||||
}
|
||||
}
|
@@ -6,7 +6,6 @@ import nu.marginalia.btree.BTreeWriter;
|
||||
import nu.marginalia.index.ReverseIndexParameters;
|
||||
import nu.marginalia.index.construction.CountToOffsetTransformer;
|
||||
import nu.marginalia.index.construction.DocIdRewriter;
|
||||
import nu.marginalia.index.construction.IndexSizeEstimator;
|
||||
import nu.marginalia.index.construction.PositionsFileConstructor;
|
||||
import nu.marginalia.index.journal.IndexJournalPage;
|
||||
import org.slf4j.Logger;
|
||||
@@ -81,15 +80,11 @@ public class FullPreindex {
|
||||
|
||||
// Estimate the size of the docs index data
|
||||
offsets.transformEach(0, offsets.size(), new CountToOffsetTransformer(2));
|
||||
IndexSizeEstimator sizeEstimator = new IndexSizeEstimator(ReverseIndexParameters.fullDocsBTreeContext, 2);
|
||||
offsets.fold(0, 0, offsets.size(), sizeEstimator);
|
||||
|
||||
// Write the docs file
|
||||
LongArray finalDocs = LongArrayFactory.mmapForWritingConfined(outputFileDocs, sizeEstimator.size);
|
||||
offsets.transformEachIO(0, offsets.size(),
|
||||
new FullIndexBTreeTransformer(finalDocs, 2,
|
||||
ReverseIndexParameters.fullDocsBTreeContext,
|
||||
documents.documents));
|
||||
try (var transformer = new FullIndexSkipListTransformer(outputFileDocs, documents.documents)) {
|
||||
offsets.transformEachIO(0, offsets.size(), transformer);
|
||||
}
|
||||
|
||||
LongArray wordIds = segments.wordIds;
|
||||
|
||||
@@ -102,7 +97,7 @@ public class FullPreindex {
|
||||
// Estimate the size of the words index data
|
||||
long wordsSize = ReverseIndexParameters.wordsBTreeContext.calculateSize((int) offsets.size());
|
||||
|
||||
// Construct the tree
|
||||
// Construct the keywords tree
|
||||
LongArray wordsArray = LongArrayFactory.mmapForWritingConfined(outputFileWords, wordsSize);
|
||||
|
||||
new BTreeWriter(wordsArray, ReverseIndexParameters.wordsBTreeContext)
|
||||
@@ -113,8 +108,6 @@ public class FullPreindex {
|
||||
}
|
||||
});
|
||||
|
||||
finalDocs.force();
|
||||
finalDocs.close();
|
||||
wordsArray.force();
|
||||
wordsArray.close();
|
||||
|
||||
|
@@ -12,10 +12,8 @@ import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.List;
|
||||
|
||||
/** A LongArray with document data, segmented according to
|
||||
@@ -52,11 +50,6 @@ public class FullPreindexDocuments {
|
||||
return new FullPreindexDocuments(docsFileMap, docsFile);
|
||||
}
|
||||
|
||||
public FileChannel createDocumentsFileChannel() throws IOException {
|
||||
return (FileChannel) Files.newByteChannel(file, StandardOpenOption.READ);
|
||||
}
|
||||
|
||||
|
||||
public LongArray slice(long start, long end) {
|
||||
return documents.range(start, end);
|
||||
}
|
||||
@@ -86,6 +79,8 @@ public class FullPreindexDocuments {
|
||||
var offsetMap = segments.asMap(RECORD_SIZE_LONGS);
|
||||
offsetMap.defaultReturnValue(0);
|
||||
|
||||
var positionsBlock = positionsFileConstructor.getBlock();
|
||||
|
||||
while (docIds.hasRemaining()) {
|
||||
long docId = docIds.get();
|
||||
long rankEncodedId = docIdRewriter.rewriteDocId(docId);
|
||||
@@ -101,12 +96,13 @@ public class FullPreindexDocuments {
|
||||
ByteBuffer pos = tPos.get(i);
|
||||
|
||||
long offset = offsetMap.addTo(termId, RECORD_SIZE_LONGS);
|
||||
long encodedPosOffset = positionsFileConstructor.add(meta, pos);
|
||||
long encodedPosOffset = positionsFileConstructor.add(positionsBlock, meta, pos);
|
||||
|
||||
assembly.put(offset + 0, rankEncodedId);
|
||||
assembly.put(offset + 1, encodedPosOffset);
|
||||
}
|
||||
}
|
||||
positionsBlock.commit();
|
||||
|
||||
assembly.write(docsFile);
|
||||
}
|
||||
|
@@ -1,85 +1,69 @@
|
||||
package nu.marginalia.index.positions;
|
||||
|
||||
import nu.marginalia.uring.UringFileReader;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.foreign.Arena;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.ForkJoinPool;
|
||||
import java.util.List;
|
||||
|
||||
/** Reads positions data from the positions file */
|
||||
public class PositionsFileReader implements AutoCloseable {
|
||||
|
||||
// We use multiple file channels to avoid reads becoming serialized by the kernel.
|
||||
// If we don't do this, multi-threaded reads become strictly slower than single-threaded reads
|
||||
// (which is why AsynchronousFileChannel sucks).
|
||||
|
||||
// This is likely the best option apart from O_DIRECT or FFI:ing in libaio or io_uring.
|
||||
|
||||
private final FileChannel[] positions;
|
||||
private final ForkJoinPool forkJoinPool;
|
||||
private final UringFileReader uringFileReader;
|
||||
private static final Logger logger = LoggerFactory.getLogger(PositionsFileReader.class);
|
||||
|
||||
public PositionsFileReader(Path positionsFile) throws IOException {
|
||||
this(positionsFile, 8);
|
||||
}
|
||||
|
||||
public PositionsFileReader(Path positionsFile, int nreaders) throws IOException {
|
||||
positions = new FileChannel[nreaders];
|
||||
for (int i = 0; i < positions.length; i++) {
|
||||
positions[i] = FileChannel.open(positionsFile, StandardOpenOption.READ);
|
||||
if ((Files.size(positionsFile) & 4095) != 0) {
|
||||
throw new IllegalArgumentException("Positions file is not block aligned in size: " + Files.size(positionsFile));
|
||||
}
|
||||
forkJoinPool = new ForkJoinPool(nreaders);
|
||||
uringFileReader = new UringFileReader(positionsFile, true);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
for (FileChannel fc : positions) {
|
||||
fc.close();
|
||||
}
|
||||
forkJoinPool.close();
|
||||
uringFileReader.close();
|
||||
}
|
||||
|
||||
/** Get the positions for a keywords in the index, as pointed out by the encoded offsets;
|
||||
* intermediate buffers are allocated from the provided arena allocator. */
|
||||
public TermData[] getTermData(Arena arena, long[] offsets) {
|
||||
TermData[] ret = new TermData[offsets.length];
|
||||
|
||||
int tasks = 0;
|
||||
for (long l : offsets) if (l != 0) tasks++;
|
||||
|
||||
CountDownLatch cl = new CountDownLatch(tasks);
|
||||
int cnt = 0;
|
||||
|
||||
for (int i = 0; i < offsets.length; i++) {
|
||||
long encodedOffset = offsets[i];
|
||||
if (encodedOffset == 0) continue;
|
||||
|
||||
int idx = i;
|
||||
int length = PositionCodec.decodeSize(encodedOffset);
|
||||
long offset = PositionCodec.decodeOffset(encodedOffset);
|
||||
ByteBuffer buffer = arena.allocate(length).asByteBuffer();
|
||||
|
||||
forkJoinPool.execute(() -> {
|
||||
try {
|
||||
positions[idx % positions.length].read(buffer, offset);
|
||||
ret[idx] = new TermData(buffer);
|
||||
cl.countDown();
|
||||
}
|
||||
catch (IOException ex) {
|
||||
logger.error("Failed to read positions file", ex);
|
||||
}
|
||||
});
|
||||
cnt++;
|
||||
}
|
||||
|
||||
try {
|
||||
cl.await();
|
||||
} catch (InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
if (cnt == 0) {
|
||||
return new TermData[offsets.length];
|
||||
}
|
||||
|
||||
long[] readOffsets = new long[cnt];
|
||||
int[] readSizes = new int[cnt];
|
||||
|
||||
for (int i = 0, j = 0; i < offsets.length; i++) {
|
||||
long encodedOffset = offsets[i];
|
||||
if (encodedOffset == 0) continue;
|
||||
|
||||
readSizes[j] = PositionCodec.decodeSize(encodedOffset);
|
||||
readOffsets[j] = PositionCodec.decodeOffset(encodedOffset);
|
||||
j++;
|
||||
}
|
||||
|
||||
List<MemorySegment> buffers = uringFileReader.readUnalignedInDirectMode(arena, readOffsets, readSizes, 4096);
|
||||
|
||||
TermData[] ret = new TermData[offsets.length];
|
||||
for (int i = 0, j=0; i < offsets.length; i++) {
|
||||
long encodedOffset = offsets[i];
|
||||
if (encodedOffset == 0) continue;
|
||||
ret[i] = new TermData(buffers.get(j++).asByteBuffer());
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
@@ -1,24 +1,22 @@
|
||||
package nu.marginalia.index.query;
|
||||
|
||||
import nu.marginalia.array.page.LongQueryBuffer;
|
||||
import nu.marginalia.btree.BTreeReader;
|
||||
import nu.marginalia.index.query.filter.QueryFilterStepIf;
|
||||
import nu.marginalia.skiplist.SkipListReader;
|
||||
|
||||
public record ReverseIndexRejectFilter(BTreeReader range) implements QueryFilterStepIf {
|
||||
public record ReverseIndexRejectFilter(SkipListReader range, IndexSearchBudget budget) implements QueryFilterStepIf {
|
||||
|
||||
@Override
|
||||
public void apply(LongQueryBuffer buffer) {
|
||||
range.rejectEntries(buffer);
|
||||
while (budget.hasTimeLeft() && range.tryRejectData(buffer));
|
||||
|
||||
buffer.finalizeFiltering();
|
||||
}
|
||||
|
||||
public boolean test(long id) {
|
||||
return range.findEntry(id) < 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double cost() {
|
||||
return range.numEntries();
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@@ -1,24 +1,21 @@
|
||||
package nu.marginalia.index.query;
|
||||
|
||||
import nu.marginalia.array.page.LongQueryBuffer;
|
||||
import nu.marginalia.btree.BTreeReader;
|
||||
import nu.marginalia.index.query.filter.QueryFilterStepIf;
|
||||
import nu.marginalia.skiplist.SkipListReader;
|
||||
|
||||
public record ReverseIndexRetainFilter(BTreeReader range, String name, long wordId) implements QueryFilterStepIf {
|
||||
public record ReverseIndexRetainFilter(SkipListReader range, String name, long wordId, IndexSearchBudget budget) implements QueryFilterStepIf {
|
||||
|
||||
@Override
|
||||
public void apply(LongQueryBuffer buffer) {
|
||||
range.retainEntries(buffer);
|
||||
buffer.finalizeFiltering();
|
||||
}
|
||||
while (budget.hasTimeLeft() && range.tryRetainData(buffer));
|
||||
|
||||
public boolean test(long id) {
|
||||
return range.findEntry(id) >= 0;
|
||||
buffer.finalizeFiltering();
|
||||
}
|
||||
|
||||
@Override
|
||||
public double cost() {
|
||||
return range.numEntries();
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@@ -33,9 +33,11 @@ class PositionsFileReaderTest {
|
||||
void getTermData() throws IOException {
|
||||
long key1, key2, key3;
|
||||
try (PositionsFileConstructor constructor = new PositionsFileConstructor(file)) {
|
||||
key1 = constructor.add((byte) 43, VarintCodedSequence.generate(1, 2, 3).buffer());
|
||||
key2 = constructor.add((byte) 51, VarintCodedSequence.generate(2, 3, 5, 1000, 5000, 20241).buffer());
|
||||
key3 = constructor.add((byte) 61, VarintCodedSequence.generate(3, 5, 7).buffer());
|
||||
var block = constructor.getBlock();
|
||||
key1 = constructor.add(block, (byte) 43, VarintCodedSequence.generate(1, 2, 3).buffer());
|
||||
key2 = constructor.add(block, (byte) 51, VarintCodedSequence.generate(2, 3, 5, 1000, 5000, 20241).buffer());
|
||||
key3 = constructor.add(block, (byte) 61, VarintCodedSequence.generate(3, 5, 7).buffer());
|
||||
block.commit();
|
||||
}
|
||||
|
||||
System.out.println("key1: " + Long.toHexString(key1));
|
||||
|
@@ -1,149 +0,0 @@
|
||||
|
||||
package nu.marginalia.index.construction.full;
|
||||
|
||||
import nu.marginalia.array.LongArrayFactory;
|
||||
import nu.marginalia.btree.model.BTreeHeader;
|
||||
import nu.marginalia.hash.MurmurHash3_128;
|
||||
import nu.marginalia.index.construction.DocIdRewriter;
|
||||
import nu.marginalia.index.construction.PositionsFileConstructor;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import static nu.marginalia.index.construction.full.TestJournalFactory.EntryDataWithWordMeta;
|
||||
import static nu.marginalia.index.construction.full.TestJournalFactory.wm;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
class FullPreindexFinalizeTest {
|
||||
TestJournalFactory journalFactory;
|
||||
Path positionsFile;
|
||||
Path countsFile;
|
||||
Path wordsIdFile;
|
||||
Path docsFile;
|
||||
Path tempDir;
|
||||
|
||||
@BeforeEach
|
||||
public void setUp() throws IOException {
|
||||
journalFactory = new TestJournalFactory();
|
||||
|
||||
positionsFile = Files.createTempFile("positions", ".dat");
|
||||
countsFile = Files.createTempFile("counts", ".dat");
|
||||
wordsIdFile = Files.createTempFile("words", ".dat");
|
||||
docsFile = Files.createTempFile("docs", ".dat");
|
||||
tempDir = Files.createTempDirectory("sort");
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
public void tearDown() throws IOException {
|
||||
journalFactory.clear();
|
||||
|
||||
Files.deleteIfExists(countsFile);
|
||||
Files.deleteIfExists(wordsIdFile);
|
||||
List<Path> contents = new ArrayList<>();
|
||||
Files.list(tempDir).forEach(contents::add);
|
||||
for (var tempFile : contents) {
|
||||
Files.delete(tempFile);
|
||||
}
|
||||
Files.delete(tempDir);
|
||||
}
|
||||
|
||||
MurmurHash3_128 hash = new MurmurHash3_128();
|
||||
long termId(String keyword) {
|
||||
return hash.hashKeyword(keyword);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFinalizeSimple() throws IOException {
|
||||
var reader = journalFactory.createReader(new EntryDataWithWordMeta(100, 101, wm(50, 51)));
|
||||
var preindex = FullPreindex.constructPreindex(reader,
|
||||
new PositionsFileConstructor(positionsFile),
|
||||
DocIdRewriter.identity(), tempDir);
|
||||
|
||||
|
||||
preindex.finalizeIndex(tempDir.resolve( "docs.dat"), tempDir.resolve("words.dat"));
|
||||
preindex.delete();
|
||||
|
||||
Path wordsFile = tempDir.resolve("words.dat");
|
||||
Path docsFile = tempDir.resolve("docs.dat");
|
||||
|
||||
assertTrue(Files.exists(wordsFile));
|
||||
assertTrue(Files.exists(docsFile));
|
||||
|
||||
System.out.println(Files.size(wordsFile));
|
||||
System.out.println(Files.size(docsFile));
|
||||
|
||||
var docsArray = LongArrayFactory.mmapForReadingConfined(docsFile);
|
||||
var wordsArray = LongArrayFactory.mmapForReadingConfined(wordsFile);
|
||||
|
||||
var docsHeader = new BTreeHeader(docsArray, 0);
|
||||
var wordsHeader = new BTreeHeader(wordsArray, 0);
|
||||
|
||||
assertEquals(1, docsHeader.numEntries());
|
||||
assertEquals(1, wordsHeader.numEntries());
|
||||
|
||||
assertEquals(100, docsArray.get(docsHeader.dataOffsetLongs() + 0));
|
||||
assertEquals(termId("50"), wordsArray.get(wordsHeader.dataOffsetLongs()));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testFinalizeSimple2x2() throws IOException {
|
||||
var reader = journalFactory.createReader(
|
||||
new EntryDataWithWordMeta(100, 101, wm(50, 51)),
|
||||
new EntryDataWithWordMeta(101, 101, wm(51, 52))
|
||||
);
|
||||
|
||||
var preindex = FullPreindex.constructPreindex(reader,
|
||||
new PositionsFileConstructor(positionsFile),
|
||||
DocIdRewriter.identity(), tempDir);
|
||||
|
||||
preindex.finalizeIndex(tempDir.resolve( "docs.dat"), tempDir.resolve("words.dat"));
|
||||
preindex.delete();
|
||||
|
||||
Path wordsFile = tempDir.resolve("words.dat");
|
||||
Path docsFile = tempDir.resolve("docs.dat");
|
||||
|
||||
assertTrue(Files.exists(wordsFile));
|
||||
assertTrue(Files.exists(docsFile));
|
||||
|
||||
System.out.println(Files.size(wordsFile));
|
||||
System.out.println(Files.size(docsFile));
|
||||
|
||||
var docsArray = LongArrayFactory.mmapForReadingConfined(docsFile);
|
||||
var wordsArray = LongArrayFactory.mmapForReadingConfined(wordsFile);
|
||||
|
||||
|
||||
var wordsHeader = new BTreeHeader(wordsArray, 0);
|
||||
|
||||
System.out.println(wordsHeader);
|
||||
|
||||
assertEquals(2, wordsHeader.numEntries());
|
||||
|
||||
long offset1 = wordsArray.get(wordsHeader.dataOffsetLongs() + 1);
|
||||
long offset2 = wordsArray.get(wordsHeader.dataOffsetLongs() + 3);
|
||||
|
||||
assertEquals(termId("50"), wordsArray.get(wordsHeader.dataOffsetLongs()));
|
||||
assertEquals(termId("50"), wordsArray.get(wordsHeader.dataOffsetLongs()));
|
||||
|
||||
BTreeHeader docsHeader;
|
||||
|
||||
docsHeader = new BTreeHeader(docsArray, offset1);
|
||||
System.out.println(docsHeader);
|
||||
assertEquals(1, docsHeader.numEntries());
|
||||
|
||||
assertEquals(100, docsArray.get(docsHeader.dataOffsetLongs() + 0));
|
||||
|
||||
docsHeader = new BTreeHeader(docsArray, offset2);
|
||||
System.out.println(docsHeader);
|
||||
assertEquals(1, docsHeader.numEntries());
|
||||
|
||||
assertEquals(101, docsArray.get(docsHeader.dataOffsetLongs() + 0));
|
||||
}
|
||||
}
|
@@ -10,19 +10,37 @@ import nu.marginalia.index.query.IndexQuery;
|
||||
import nu.marginalia.index.query.IndexSearchBudget;
|
||||
import nu.marginalia.index.results.IndexResultRankingService;
|
||||
import nu.marginalia.index.results.model.ids.CombinedDocIdList;
|
||||
import nu.marginalia.skiplist.SkipListConstants;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.ForkJoinPool;
|
||||
import java.util.concurrent.*;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
/** Performs an index query */
|
||||
public class IndexQueryExecution {
|
||||
|
||||
private static final int indexValuationThreads = Integer.getInteger("index.valuationThreads", 16);
|
||||
private static final int indexPreparationThreads = Integer.getInteger("index.preparationThreads", 4);
|
||||
|
||||
private static final ForkJoinPool lookupPool = new ForkJoinPool(indexValuationThreads);
|
||||
private static final ForkJoinPool evaluationPool = new ForkJoinPool(indexValuationThreads);
|
||||
// Since most NVMe drives have a maximum read size of 128 KB, and most small reads are 512B
|
||||
// this should probably be 128*1024 / 512 = 256 to reduce queue depth and optimize tail latency
|
||||
private static final int evaluationBatchSize = 256;
|
||||
|
||||
// This should probably be SkipListConstants.BLOCK_SIZE / 16 in order to reduce the number of unnecessary read
|
||||
// operations per lookup and again optimize tail latency
|
||||
private static final int lookupBatchSize = SkipListConstants.BLOCK_SIZE / 16;
|
||||
|
||||
private static final AtomicLong lookupTime = new AtomicLong();
|
||||
private static final AtomicLong prepTime = new AtomicLong();
|
||||
private static final AtomicLong valuationTime = new AtomicLong();
|
||||
|
||||
private static final ExecutorService threadPool = new ThreadPoolExecutor(indexValuationThreads, Integer.MAX_VALUE, 60L, TimeUnit.SECONDS, new SynchronousQueue<>());
|
||||
private static final Logger log = LoggerFactory.getLogger(IndexQueryExecution.class);
|
||||
|
||||
private final IndexResultRankingService rankingService;
|
||||
|
||||
@@ -30,12 +48,32 @@ public class IndexQueryExecution {
|
||||
private final List<IndexQuery> queries;
|
||||
private final IndexSearchBudget budget;
|
||||
private final ResultPriorityQueue resultHeap;
|
||||
private final CountDownLatch executionCountdown;
|
||||
private final CountDownLatch lookupCountdown;
|
||||
private final CountDownLatch preparationCountdown;
|
||||
private final CountDownLatch rankingCountdown;
|
||||
|
||||
private final ArrayBlockingQueue<CombinedDocIdList> fullPreparationQueue = new ArrayBlockingQueue<>(8, true);
|
||||
private final ArrayBlockingQueue<CombinedDocIdList> priorityPreparationQueue = new ArrayBlockingQueue<>(8, true);
|
||||
private final ArrayBlockingQueue<IndexResultRankingService.RankingData> fullEvaluationQueue = new ArrayBlockingQueue<>(8, true);
|
||||
private final ArrayBlockingQueue<IndexResultRankingService.RankingData> priorityEvaluationQueue = new ArrayBlockingQueue<>(8, true);
|
||||
|
||||
private final int limitTotal;
|
||||
private final int limitByDomain;
|
||||
|
||||
private int evaluationJobCounter;
|
||||
static {
|
||||
Thread.ofPlatform().daemon().start(() -> {
|
||||
for (;;) {
|
||||
try {
|
||||
TimeUnit.SECONDS.sleep(10);
|
||||
}
|
||||
catch (InterruptedException e) {
|
||||
e.printStackTrace();
|
||||
break;
|
||||
}
|
||||
log.info("Lookup: {}, Valuation: {}, Prep Time: {}", lookupTime.get() / 1_000_000_000., valuationTime.get() / 1_000_000_000., prepTime.get() / 1_000_000_000.);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public IndexQueryExecution(SearchParameters params,
|
||||
IndexResultRankingService rankingService,
|
||||
@@ -49,84 +87,137 @@ public class IndexQueryExecution {
|
||||
limitTotal = params.limitTotal;
|
||||
|
||||
rankingContext = ResultRankingContext.create(currentIndex, params);
|
||||
queries = currentIndex.createQueries(new SearchTerms(params.query, params.compiledQueryIds), params.queryParams);
|
||||
executionCountdown = new CountDownLatch(queries.size());
|
||||
queries = currentIndex.createQueries(new SearchTerms(params.query, params.compiledQueryIds), params.queryParams, budget);
|
||||
|
||||
evaluationJobCounter = 0;
|
||||
lookupCountdown = new CountDownLatch(queries.size());
|
||||
preparationCountdown = new CountDownLatch(indexPreparationThreads * 2);
|
||||
rankingCountdown = new CountDownLatch(indexValuationThreads * 2);
|
||||
}
|
||||
|
||||
public List<RpcDecoratedResultItem> run() throws InterruptedException, SQLException {
|
||||
// Spawn lookup tasks for each query
|
||||
for (IndexQuery query : queries) {
|
||||
lookupPool.execute(() -> lookup(query));
|
||||
threadPool.submit(() -> lookup(query));
|
||||
}
|
||||
|
||||
// Await lookup task termination (this guarantees we're no longer creating new evaluation tasks)
|
||||
executionCountdown.await();
|
||||
for (int i = 0; i < indexPreparationThreads; i++) {
|
||||
threadPool.submit(() -> prepare(priorityPreparationQueue, priorityEvaluationQueue));
|
||||
threadPool.submit(() -> prepare(fullPreparationQueue, fullEvaluationQueue));
|
||||
}
|
||||
|
||||
// Await evaluation task termination
|
||||
synchronized (IndexQueryExecution.this) {
|
||||
while (evaluationJobCounter > 0 && budget.hasTimeLeft()) {
|
||||
IndexQueryExecution.this.wait(budget.timeLeft());
|
||||
}
|
||||
// Spawn lookup tasks for each query
|
||||
for (int i = 0; i < indexValuationThreads; i++) {
|
||||
threadPool.submit(() -> evaluate(priorityEvaluationQueue));
|
||||
threadPool.submit(() -> evaluate(fullEvaluationQueue));
|
||||
}
|
||||
|
||||
// Await lookup task termination
|
||||
lookupCountdown.await();
|
||||
preparationCountdown.await();
|
||||
rankingCountdown.await();
|
||||
|
||||
// Deallocate any leftover ranking data buffers
|
||||
for (var data : priorityEvaluationQueue) {
|
||||
data.close();
|
||||
}
|
||||
for (var data : fullEvaluationQueue) {
|
||||
data.close();
|
||||
}
|
||||
|
||||
// Final result selection
|
||||
return rankingService.selectBestResults(limitByDomain, limitTotal, rankingContext, resultHeap);
|
||||
return rankingService.selectBestResults(limitByDomain, limitTotal, rankingContext, resultHeap.toList());
|
||||
}
|
||||
|
||||
private void lookup(IndexQuery query) {
|
||||
final LongQueryBuffer buffer = new LongQueryBuffer(8192);
|
||||
private List<Future<?>> lookup(IndexQuery query) {
|
||||
final LongQueryBuffer buffer = new LongQueryBuffer(lookupBatchSize);
|
||||
List<Future<?>> evaluationJobs = new ArrayList<>();
|
||||
try {
|
||||
while (query.hasMore() && budget.hasTimeLeft()) {
|
||||
|
||||
buffer.reset();
|
||||
buffer.zero();
|
||||
|
||||
long st = System.nanoTime();
|
||||
query.getMoreResults(buffer);
|
||||
long et = System.nanoTime();
|
||||
lookupTime.addAndGet(et - st);
|
||||
|
||||
if (buffer.isEmpty())
|
||||
continue;
|
||||
|
||||
CombinedDocIdList docIds = new CombinedDocIdList(buffer);
|
||||
var queue = query.isPrioritized() ? priorityPreparationQueue : fullPreparationQueue;
|
||||
|
||||
boolean stealWork = false;
|
||||
synchronized (IndexQueryExecution.this) {
|
||||
// Hold off on spawning new evaluation jobs if we have too many queued
|
||||
// to avoid backpressure, instead steal work into the lookup thread
|
||||
// in this scenario
|
||||
if (buffer.end <= evaluationBatchSize) {
|
||||
var docIds = new CombinedDocIdList(buffer);
|
||||
|
||||
if (evaluationJobCounter > indexValuationThreads * 8) {
|
||||
stealWork = true;
|
||||
}
|
||||
else {
|
||||
evaluationJobCounter++;
|
||||
}
|
||||
}
|
||||
|
||||
if (stealWork) {
|
||||
resultHeap.addAll(rankingService.rankResults(rankingContext, budget, docIds, false));
|
||||
if (!queue.offer(docIds, Math.max(1, budget.timeLeft()), TimeUnit.MILLISECONDS))
|
||||
break;
|
||||
}
|
||||
else {
|
||||
// Spawn an evaluation task
|
||||
evaluationPool.execute(() -> evaluate(docIds));
|
||||
long[] bufferData = buffer.copyData();
|
||||
for (int start = 0; start < bufferData.length; start+= evaluationBatchSize) {
|
||||
|
||||
long[] slice = Arrays.copyOfRange(bufferData, start,
|
||||
Math.min(start + evaluationBatchSize, bufferData.length));
|
||||
|
||||
var docIds = new CombinedDocIdList(slice);
|
||||
|
||||
if (!queue.offer(docIds, Math.max(1, budget.timeLeft()), TimeUnit.MILLISECONDS))
|
||||
break;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (RuntimeException | InterruptedException ex) {
|
||||
log.error("Exception in lookup thread", ex);
|
||||
} finally {
|
||||
buffer.dispose();
|
||||
executionCountdown.countDown();
|
||||
lookupCountdown.countDown();
|
||||
}
|
||||
|
||||
return evaluationJobs;
|
||||
}
|
||||
|
||||
private void prepare(ArrayBlockingQueue<CombinedDocIdList> inputQueue, ArrayBlockingQueue<IndexResultRankingService.RankingData> outputQueue) {
|
||||
try {
|
||||
while (budget.hasTimeLeft() && (lookupCountdown.getCount() > 0 || !inputQueue.isEmpty())) {
|
||||
var docIds = inputQueue.poll(Math.clamp(budget.timeLeft(), 1, 5), TimeUnit.MILLISECONDS);
|
||||
if (docIds == null) continue;
|
||||
long st = System.nanoTime();
|
||||
var preparedData = rankingService.prepareRankingData(rankingContext, docIds, budget);
|
||||
long et = System.nanoTime();
|
||||
prepTime.addAndGet(et - st);
|
||||
if (!outputQueue.offer(preparedData, Math.max(1, budget.timeLeft()), TimeUnit.MILLISECONDS))
|
||||
preparedData.close();
|
||||
}
|
||||
} catch (TimeoutException ex) {
|
||||
// This is normal
|
||||
} catch (Exception ex) {
|
||||
if (!(ex.getCause() instanceof InterruptedException)) {
|
||||
log.error("Exception in lookup thread", ex);
|
||||
} // suppress logging for interrupted ex
|
||||
} finally {
|
||||
preparationCountdown.countDown();
|
||||
}
|
||||
}
|
||||
|
||||
private void evaluate(CombinedDocIdList docIds) {
|
||||
private void evaluate(ArrayBlockingQueue<IndexResultRankingService.RankingData> queue) {
|
||||
try {
|
||||
if (!budget.hasTimeLeft())
|
||||
return;
|
||||
resultHeap.addAll(rankingService.rankResults(rankingContext, budget, docIds, false));
|
||||
} finally {
|
||||
synchronized (IndexQueryExecution.this) {
|
||||
if (--evaluationJobCounter == 0) {
|
||||
IndexQueryExecution.this.notifyAll();
|
||||
while (budget.hasTimeLeft() && (preparationCountdown.getCount() > 0 || !queue.isEmpty())) {
|
||||
var rankingData = queue.poll(Math.clamp(budget.timeLeft(), 1, 5), TimeUnit.MILLISECONDS);
|
||||
if (rankingData == null) continue;
|
||||
|
||||
try (rankingData) {
|
||||
long st = System.nanoTime();
|
||||
resultHeap.addAll(rankingService.rankResults(budget, rankingContext, rankingData, false));
|
||||
long et = System.nanoTime();
|
||||
valuationTime.addAndGet(et - st);
|
||||
}
|
||||
}
|
||||
} catch (Exception ex) {
|
||||
if (!(ex.getCause() instanceof InterruptedException)) {
|
||||
log.error("Exception in lookup thread", ex);
|
||||
} // suppress logging for interrupted ex
|
||||
} finally {
|
||||
rankingCountdown.countDown();
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -5,9 +5,7 @@ import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
|
||||
import nu.marginalia.api.searchquery.model.results.SearchResultItem;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
import java.util.*;
|
||||
|
||||
/** A priority queue for search results. This class is not thread-safe,
|
||||
* in general, except for concurrent use of the addAll method.
|
||||
@@ -46,6 +44,10 @@ public class ResultPriorityQueue implements Iterable<SearchResultItem> {
|
||||
return true;
|
||||
}
|
||||
|
||||
public synchronized List<SearchResultItem> toList() {
|
||||
return new ArrayList<>(queue);
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return queue.size();
|
||||
}
|
||||
|
@@ -5,14 +5,17 @@ import it.unimi.dsi.fastutil.longs.LongList;
|
||||
import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
|
||||
import it.unimi.dsi.fastutil.longs.LongSet;
|
||||
import nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates;
|
||||
import nu.marginalia.array.page.LongQueryBuffer;
|
||||
import nu.marginalia.index.FullReverseIndexReader;
|
||||
import nu.marginalia.index.PrioReverseIndexReader;
|
||||
import nu.marginalia.index.forward.ForwardIndexReader;
|
||||
import nu.marginalia.index.forward.spans.DocumentSpans;
|
||||
import nu.marginalia.index.model.QueryParams;
|
||||
import nu.marginalia.index.model.SearchTerms;
|
||||
import nu.marginalia.index.positions.TermData;
|
||||
import nu.marginalia.index.query.IndexQuery;
|
||||
import nu.marginalia.index.query.IndexQueryBuilder;
|
||||
import nu.marginalia.index.query.IndexSearchBudget;
|
||||
import nu.marginalia.index.query.filter.QueryFilterStepIf;
|
||||
import nu.marginalia.index.query.limit.SpecificationLimitType;
|
||||
import nu.marginalia.index.results.model.ids.CombinedDocIdList;
|
||||
@@ -25,6 +28,7 @@ import org.slf4j.LoggerFactory;
|
||||
import java.lang.foreign.Arena;
|
||||
import java.time.Duration;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
@@ -55,20 +59,19 @@ public class CombinedIndexReader {
|
||||
return new IndexQueryBuilderImpl(reverseIndexFullReader, query);
|
||||
}
|
||||
|
||||
public QueryFilterStepIf hasWordFull(long termId) {
|
||||
return reverseIndexFullReader.also(termId);
|
||||
public QueryFilterStepIf hasWordFull(long termId, IndexSearchBudget budget) {
|
||||
return reverseIndexFullReader.also(termId, budget);
|
||||
}
|
||||
|
||||
/** Creates a query builder for terms in the priority index */
|
||||
public IndexQueryBuilder findPriorityWord(long wordId) {
|
||||
return newQueryBuilder(new IndexQuery(reverseIndexPriorityReader.documents(wordId)))
|
||||
return newQueryBuilder(new IndexQuery(reverseIndexPriorityReader.documents(wordId), true))
|
||||
.withSourceTerms(wordId);
|
||||
}
|
||||
|
||||
/** Creates a query builder for terms in the full index */
|
||||
public IndexQueryBuilder findFullWord(long wordId) {
|
||||
return newQueryBuilder(
|
||||
new IndexQuery(reverseIndexFullReader.documents(wordId)))
|
||||
return newQueryBuilder(new IndexQuery(reverseIndexFullReader.documents(wordId), false))
|
||||
.withSourceTerms(wordId);
|
||||
}
|
||||
|
||||
@@ -82,7 +85,12 @@ public class CombinedIndexReader {
|
||||
return reverseIndexFullReader.numDocuments(word);
|
||||
}
|
||||
|
||||
public List<IndexQuery> createQueries(SearchTerms terms, QueryParams params) {
|
||||
/** Reset caches and buffers */
|
||||
public void reset() {
|
||||
reverseIndexFullReader.reset();
|
||||
}
|
||||
|
||||
public List<IndexQuery> createQueries(SearchTerms terms, QueryParams params, IndexSearchBudget budget) {
|
||||
|
||||
if (!isLoaded()) {
|
||||
logger.warn("Index reader not ready");
|
||||
@@ -123,7 +131,7 @@ public class CombinedIndexReader {
|
||||
continue;
|
||||
}
|
||||
|
||||
head.addInclusionFilter(hasWordFull(termId));
|
||||
head.addInclusionFilter(hasWordFull(termId, budget));
|
||||
}
|
||||
queryHeads.add(head);
|
||||
}
|
||||
@@ -132,7 +140,7 @@ public class CombinedIndexReader {
|
||||
if (paths.size() < 4) {
|
||||
var prioHead = findPriorityWord(elements.getLong(0));
|
||||
for (int i = 1; i < elements.size(); i++) {
|
||||
prioHead.addInclusionFilter(hasWordFull(elements.getLong(i)));
|
||||
prioHead.addInclusionFilter(hasWordFull(elements.getLong(i), budget));
|
||||
}
|
||||
queryHeads.add(prioHead);
|
||||
}
|
||||
@@ -143,11 +151,11 @@ public class CombinedIndexReader {
|
||||
|
||||
// Advice terms are a special case, mandatory but not ranked, and exempt from re-writing
|
||||
for (long term : terms.advice()) {
|
||||
query = query.also(term);
|
||||
query = query.also(term, budget);
|
||||
}
|
||||
|
||||
for (long term : terms.excludes()) {
|
||||
query = query.not(term);
|
||||
query = query.not(term, budget);
|
||||
}
|
||||
|
||||
// Run these filter steps last, as they'll worst-case cause as many page faults as there are
|
||||
@@ -178,6 +186,18 @@ public class CombinedIndexReader {
|
||||
}
|
||||
|
||||
/** Retrieves the term metadata for the specified word for the provided documents */
|
||||
public TermMetadataList[] getTermMetadata(Arena arena,
|
||||
long[] wordIds,
|
||||
CombinedDocIdList docIds)
|
||||
{
|
||||
TermData[] combinedTermData = reverseIndexFullReader.getTermData(arena, wordIds, docIds.array());
|
||||
TermMetadataList[] ret = new TermMetadataList[wordIds.length];
|
||||
for (int i = 0; i < wordIds.length; i++) {
|
||||
ret[i] = new TermMetadataList(Arrays.copyOfRange(combinedTermData, i*docIds.size(), (i+1)*docIds.size()));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
public TermMetadataList getTermMetadata(Arena arena,
|
||||
long wordId,
|
||||
CombinedDocIdList docIds)
|
||||
@@ -263,6 +283,23 @@ class ParamMatchingQueryFilter implements QueryFilterStepIf {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void apply(LongQueryBuffer buffer) {
|
||||
if (!imposesMetaConstraint && !params.searchSet().imposesConstraint()) {
|
||||
return;
|
||||
}
|
||||
|
||||
while (buffer.hasMore()) {
|
||||
if (test(buffer.currentValue())) {
|
||||
buffer.retainAndAdvance();
|
||||
}
|
||||
else {
|
||||
buffer.rejectAndAdvance();
|
||||
}
|
||||
}
|
||||
|
||||
buffer.finalizeFiltering();
|
||||
}
|
||||
|
||||
public boolean test(long combinedId) {
|
||||
long docId = UrlIdCodec.removeRank(combinedId);
|
||||
int domainId = UrlIdCodec.getDomainId(docId);
|
||||
@@ -348,4 +385,5 @@ class ParamMatchingQueryFilter implements QueryFilterStepIf {
|
||||
public String describe() {
|
||||
return getClass().getSimpleName();
|
||||
}
|
||||
|
||||
}
|
@@ -1,11 +1,10 @@
|
||||
package nu.marginalia.index.index;
|
||||
|
||||
import java.util.List;
|
||||
import gnu.trove.set.hash.TLongHashSet;
|
||||
import nu.marginalia.index.FullReverseIndexReader;
|
||||
import nu.marginalia.index.query.IndexQuery;
|
||||
import nu.marginalia.index.query.IndexQueryBuilder;
|
||||
import nu.marginalia.index.query.filter.QueryFilterAnyOf;
|
||||
import nu.marginalia.index.query.IndexSearchBudget;
|
||||
import nu.marginalia.index.query.filter.QueryFilterStepIf;
|
||||
|
||||
public class IndexQueryBuilderImpl implements IndexQueryBuilder {
|
||||
@@ -32,18 +31,18 @@ public class IndexQueryBuilderImpl implements IndexQueryBuilder {
|
||||
return this;
|
||||
}
|
||||
|
||||
public IndexQueryBuilder also(long termId) {
|
||||
public IndexQueryBuilder also(long termId, IndexSearchBudget budget) {
|
||||
|
||||
if (alreadyConsideredTerms.add(termId)) {
|
||||
query.addInclusionFilter(reverseIndexFullReader.also(termId));
|
||||
query.addInclusionFilter(reverseIndexFullReader.also(termId, budget));
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
public IndexQueryBuilder not(long termId) {
|
||||
public IndexQueryBuilder not(long termId, IndexSearchBudget budget) {
|
||||
|
||||
query.addInclusionFilter(reverseIndexFullReader.not(termId));
|
||||
query.addInclusionFilter(reverseIndexFullReader.not(termId, budget));
|
||||
|
||||
return this;
|
||||
}
|
||||
@@ -55,20 +54,6 @@ public class IndexQueryBuilderImpl implements IndexQueryBuilder {
|
||||
return this;
|
||||
}
|
||||
|
||||
public IndexQueryBuilder addInclusionFilterAny(List<QueryFilterStepIf> filterSteps) {
|
||||
if (filterSteps.isEmpty())
|
||||
return this;
|
||||
|
||||
if (filterSteps.size() == 1) {
|
||||
query.addInclusionFilter(filterSteps.getFirst());
|
||||
}
|
||||
else {
|
||||
query.addInclusionFilter(new QueryFilterAnyOf(filterSteps));
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
public IndexQuery build() {
|
||||
return query;
|
||||
}
|
||||
|
@@ -43,7 +43,7 @@ public class SearchParameters {
|
||||
var limits = specsSet.queryLimits;
|
||||
|
||||
this.fetchSize = limits.getFetchSize();
|
||||
this.budget = new IndexSearchBudget(limits.getTimeoutMs());
|
||||
this.budget = new IndexSearchBudget(Math.max(limits.getTimeoutMs()/2, limits.getTimeoutMs()-50));
|
||||
this.query = specsSet.query;
|
||||
this.limitByDomain = limits.getResultsByDomain();
|
||||
this.limitTotal = limits.getResultsTotal();
|
||||
@@ -67,9 +67,7 @@ public class SearchParameters {
|
||||
|
||||
this.fetchSize = limits.getFetchSize();
|
||||
|
||||
// The time budget is halved because this is the point when we start to
|
||||
// wrap up the search and return the results.
|
||||
this.budget = new IndexSearchBudget(limits.getTimeoutMs() / 2);
|
||||
this.budget = new IndexSearchBudget(Math.max(limits.getTimeoutMs()/2, limits.getTimeoutMs()-50));
|
||||
this.query = IndexProtobufCodec.convertRpcQuery(request.getQuery());
|
||||
|
||||
this.limitByDomain = limits.getResultsByDomain();
|
||||
|
@@ -14,7 +14,6 @@ import nu.marginalia.api.searchquery.model.query.SearchPhraseConstraint;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchQuery;
|
||||
import nu.marginalia.api.searchquery.model.results.SearchResultItem;
|
||||
import nu.marginalia.api.searchquery.model.results.debug.DebugRankingFactors;
|
||||
import nu.marginalia.index.ResultPriorityQueue;
|
||||
import nu.marginalia.index.forward.spans.DocumentSpans;
|
||||
import nu.marginalia.index.index.CombinedIndexReader;
|
||||
import nu.marginalia.index.index.StatefulIndex;
|
||||
@@ -32,12 +31,15 @@ import nu.marginalia.sequence.CodedSequence;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.lang.foreign.Arena;
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
@Singleton
|
||||
public class IndexResultRankingService {
|
||||
@@ -57,88 +59,132 @@ public class IndexResultRankingService {
|
||||
this.domainRankingOverrides = domainRankingOverrides;
|
||||
}
|
||||
|
||||
public List<SearchResultItem> rankResults(
|
||||
ResultRankingContext rankingContext,
|
||||
IndexSearchBudget budget,
|
||||
CombinedDocIdList resultIds,
|
||||
boolean exportDebugData)
|
||||
{
|
||||
if (resultIds.isEmpty())
|
||||
return List.of();
|
||||
public RankingData prepareRankingData(ResultRankingContext rankingContext, CombinedDocIdList resultIds, @Nullable IndexSearchBudget budget) throws TimeoutException {
|
||||
return new RankingData(rankingContext, resultIds, budget);
|
||||
}
|
||||
|
||||
IndexResultScoreCalculator resultRanker = new IndexResultScoreCalculator(statefulIndex, domainRankingOverrides, rankingContext);
|
||||
public final class RankingData implements AutoCloseable {
|
||||
final Arena arena;
|
||||
|
||||
List<SearchResultItem> results = new ArrayList<>(resultIds.size());
|
||||
private final TermMetadataList[] termsForDocs;
|
||||
private final DocumentSpans[] documentSpans;
|
||||
private final long[] flags;
|
||||
private final CodedSequence[] positions;
|
||||
private final CombinedDocIdList resultIds;
|
||||
private final QuerySearchTerms searchTerms;
|
||||
private AtomicBoolean closed = new AtomicBoolean(false);
|
||||
int pos = -1;
|
||||
|
||||
// Get the current index reader, which is the one we'll use for this calculation,
|
||||
// this may change during the calculation, but we don't want to switch over mid-calculation
|
||||
final CombinedIndexReader currentIndex = statefulIndex.get();
|
||||
public RankingData(ResultRankingContext rankingContext, CombinedDocIdList resultIds, @Nullable IndexSearchBudget budget) throws TimeoutException {
|
||||
this.resultIds = resultIds;
|
||||
this.arena = Arena.ofShared();
|
||||
|
||||
final QuerySearchTerms searchTerms = getSearchTerms(rankingContext.compiledQuery, rankingContext.searchQuery);
|
||||
final int termCount = searchTerms.termIdsAll.size();
|
||||
this.searchTerms = getSearchTerms(rankingContext.compiledQuery, rankingContext.searchQuery);
|
||||
final int termCount = searchTerms.termIdsAll.size();
|
||||
|
||||
// We use an arena for the position and spans data to limit gc pressure
|
||||
try (var arena = Arena.ofShared()) {
|
||||
this.flags = new long[termCount];
|
||||
this.positions = new CodedSequence[termCount];
|
||||
|
||||
TermMetadataList[] termsForDocs = new TermMetadataList[termCount];
|
||||
for (int ti = 0; ti < termCount; ti++) {
|
||||
termsForDocs[ti] = currentIndex.getTermMetadata(arena, searchTerms.termIdsAll.at(ti), resultIds);
|
||||
}
|
||||
// Get the current index reader, which is the one we'll use for this calculation,
|
||||
// this may change during the calculation, but we don't want to switch over mid-calculation
|
||||
|
||||
// Data for the document. We arrange this in arrays outside the calculation function to avoid
|
||||
// hash lookups in the inner loop, as it's hot code, and we don't want unnecessary cpu cache
|
||||
// thrashing in there; out here we can rely on implicit array ordering to match up the data.
|
||||
final CombinedIndexReader currentIndex = statefulIndex.get();
|
||||
|
||||
long[] flags = new long[termCount];
|
||||
CodedSequence[] positions = new CodedSequence[termCount];
|
||||
DocumentSpans[] documentSpans = currentIndex.getDocumentSpans(arena, resultIds);
|
||||
// Perform expensive I/O operations
|
||||
|
||||
// Iterate over documents by their index in the combinedDocIds, as we need the index for the
|
||||
// term data arrays as well
|
||||
this.termsForDocs = currentIndex.getTermMetadata(arena, searchTerms.termIdsAll.array, resultIds);
|
||||
if (!budget.hasTimeLeft())
|
||||
throw new TimeoutException();
|
||||
this.documentSpans = currentIndex.getDocumentSpans(arena, resultIds);
|
||||
}
|
||||
|
||||
for (int i = 0; i < resultIds.size() && budget.hasTimeLeft(); i++) {
|
||||
public CodedSequence[] positions() {
|
||||
return positions;
|
||||
}
|
||||
public long[] flags() {
|
||||
return flags;
|
||||
}
|
||||
public long resultId() {
|
||||
return resultIds.at(pos);
|
||||
}
|
||||
public DocumentSpans documentSpans() {
|
||||
return documentSpans[pos];
|
||||
}
|
||||
|
||||
// Prepare term-level data for the document
|
||||
public boolean next() {
|
||||
if (++pos < resultIds.size()) {
|
||||
for (int ti = 0; ti < flags.length; ti++) {
|
||||
var tfd = termsForDocs[ti];
|
||||
|
||||
assert tfd != null : "No term data for term " + ti;
|
||||
|
||||
flags[ti] = tfd.flag(i);
|
||||
positions[ti] = tfd.position(i);
|
||||
flags[ti] = tfd.flag(pos);
|
||||
positions[ti] = tfd.position(pos);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Ignore documents that don't match the mandatory constraints
|
||||
if (!searchTerms.phraseConstraints.testMandatory(positions)) {
|
||||
continue;
|
||||
}
|
||||
public int size() {
|
||||
return resultIds.size();
|
||||
}
|
||||
|
||||
if (!exportDebugData) {
|
||||
var score = resultRanker.calculateScore(null, resultIds.at(i), searchTerms, flags, positions, documentSpans[i]);
|
||||
if (score != null) {
|
||||
results.add(score);
|
||||
}
|
||||
}
|
||||
else {
|
||||
var rankingFactors = new DebugRankingFactors();
|
||||
var score = resultRanker.calculateScore( rankingFactors, resultIds.at(i), searchTerms, flags, positions, documentSpans[i]);
|
||||
public void close() {
|
||||
if (closed.compareAndSet(false, true)) {
|
||||
arena.close();
|
||||
}
|
||||
}
|
||||
|
||||
if (score != null) {
|
||||
score.debugRankingFactors = rankingFactors;
|
||||
results.add(score);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public List<SearchResultItem> rankResults(
|
||||
IndexSearchBudget budget,
|
||||
ResultRankingContext rankingContext,
|
||||
RankingData rankingData,
|
||||
boolean exportDebugData)
|
||||
{
|
||||
IndexResultScoreCalculator resultRanker = new IndexResultScoreCalculator(statefulIndex, domainRankingOverrides, rankingContext);
|
||||
|
||||
List<SearchResultItem> results = new ArrayList<>(rankingData.size());
|
||||
|
||||
// Iterate over documents by their index in the combinedDocIds, as we need the index for the
|
||||
// term data arrays as well
|
||||
|
||||
var searchTerms = rankingData.searchTerms;
|
||||
|
||||
while (rankingData.next() && budget.hasTimeLeft()) {
|
||||
|
||||
// Ignore documents that don't match the mandatory constraints
|
||||
if (!searchTerms.phraseConstraints.testMandatory(rankingData.positions())) {
|
||||
continue;
|
||||
}
|
||||
|
||||
return results;
|
||||
if (!exportDebugData) {
|
||||
var score = resultRanker.calculateScore(null, rankingData.resultId(), searchTerms, rankingData.flags(), rankingData.positions(), rankingData.documentSpans());
|
||||
if (score != null) {
|
||||
results.add(score);
|
||||
}
|
||||
}
|
||||
else {
|
||||
var rankingFactors = new DebugRankingFactors();
|
||||
var score = resultRanker.calculateScore( rankingFactors, rankingData.resultId(), searchTerms, rankingData.flags(), rankingData.positions(), rankingData.documentSpans());
|
||||
|
||||
if (score != null) {
|
||||
score.debugRankingFactors = rankingFactors;
|
||||
results.add(score);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
|
||||
public List<RpcDecoratedResultItem> selectBestResults(int limitByDomain,
|
||||
int limitTotal,
|
||||
ResultRankingContext resultRankingContext,
|
||||
ResultPriorityQueue results) throws SQLException {
|
||||
List<SearchResultItem> results) throws SQLException {
|
||||
|
||||
var domainCountFilter = new IndexResultDomainDeduplicator(limitByDomain);
|
||||
|
||||
@@ -174,11 +220,18 @@ public class IndexResultRankingService {
|
||||
|
||||
resultsList.clear();
|
||||
IndexSearchBudget budget = new IndexSearchBudget(10000);
|
||||
resultsList.addAll(this.rankResults(
|
||||
resultRankingContext,
|
||||
budget, new CombinedDocIdList(combinedIdsList),
|
||||
true)
|
||||
);
|
||||
try (var data = prepareRankingData(resultRankingContext, new CombinedDocIdList(combinedIdsList), null)) {
|
||||
resultsList.addAll(this.rankResults(
|
||||
budget,
|
||||
resultRankingContext,
|
||||
data,
|
||||
true)
|
||||
);
|
||||
}
|
||||
catch (TimeoutException ex) {
|
||||
// this won't happen since we passed null for budget
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Fetch the document details for the selected results in one go, from the local document database
|
||||
|
@@ -2,7 +2,6 @@ package nu.marginalia.index.results.model.ids;
|
||||
|
||||
import it.unimi.dsi.fastutil.longs.LongArrayList;
|
||||
import nu.marginalia.array.page.LongQueryBuffer;
|
||||
import org.roaringbitmap.longlong.Roaring64Bitmap;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.stream.LongStream;
|
||||
@@ -24,13 +23,17 @@ public final class CombinedDocIdList {
|
||||
public CombinedDocIdList(LongArrayList data) {
|
||||
this.data = data.toLongArray();
|
||||
}
|
||||
public CombinedDocIdList(Roaring64Bitmap data) {
|
||||
this.data = data.toArray();
|
||||
}
|
||||
public CombinedDocIdList() {
|
||||
this.data = new long[0];
|
||||
}
|
||||
|
||||
public static CombinedDocIdList combineLists(CombinedDocIdList one, CombinedDocIdList other) {
|
||||
long[] data = new long[one.size() + other.size()];
|
||||
System.arraycopy(one.data, 0, data, 0, one.data.length);
|
||||
System.arraycopy(other.data, 0, data, one.data.length, other.data.length);
|
||||
return new CombinedDocIdList(data);
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return data.length;
|
||||
}
|
||||
|
@@ -6,7 +6,7 @@ import java.util.Arrays;
|
||||
import java.util.stream.LongStream;
|
||||
|
||||
public final class TermIdList {
|
||||
private final long[] array;
|
||||
public final long[] array;
|
||||
|
||||
public TermIdList(long[] array) {
|
||||
this.array = array;
|
||||
|
@@ -9,4 +9,8 @@ public interface SearchSet {
|
||||
*/
|
||||
boolean contains(int domainId);
|
||||
|
||||
default boolean imposesConstraint() {
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -10,4 +10,9 @@ public class SearchSetAny implements SearchSet {
|
||||
public String toString() {
|
||||
return getClass().getSimpleName();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean imposesConstraint() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@@ -1,7 +1,7 @@
|
||||
package nu.marginalia.index.query;
|
||||
|
||||
import nu.marginalia.index.query.filter.QueryFilterStepIf;
|
||||
import nu.marginalia.array.page.LongQueryBuffer;
|
||||
import nu.marginalia.index.query.filter.QueryFilterStepIf;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
@@ -18,15 +18,16 @@ import java.util.stream.Collectors;
|
||||
public class IndexQuery {
|
||||
private final List<EntrySource> sources;
|
||||
private final List<QueryFilterStepIf> inclusionFilter = new ArrayList<>(10);
|
||||
private boolean prioritize = false;
|
||||
|
||||
public IndexQuery(List<EntrySource> sources)
|
||||
public IndexQuery(EntrySource source, boolean prioritize)
|
||||
{
|
||||
this.sources = sources;
|
||||
this.sources = List.of(source);
|
||||
this.prioritize = prioritize;
|
||||
}
|
||||
|
||||
public IndexQuery(EntrySource... sources)
|
||||
{
|
||||
this.sources = List.of(sources);
|
||||
public boolean isPrioritized() {
|
||||
return prioritize;
|
||||
}
|
||||
/** Adds a filter to the query. The filter will be applied to the results
|
||||
* after they are read from the sources.
|
||||
@@ -60,6 +61,7 @@ public class IndexQuery {
|
||||
if (!fillBuffer(dest))
|
||||
return;
|
||||
|
||||
|
||||
for (var filter : inclusionFilter) {
|
||||
filter.apply(dest);
|
||||
|
||||
@@ -73,6 +75,8 @@ public class IndexQuery {
|
||||
|
||||
private boolean fillBuffer(LongQueryBuffer dest) {
|
||||
for (;;) {
|
||||
dest.zero();
|
||||
|
||||
EntrySource source = sources.get(si);
|
||||
source.read(dest);
|
||||
|
||||
@@ -102,6 +106,7 @@ public class IndexQuery {
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@@ -2,8 +2,6 @@ package nu.marginalia.index.query;
|
||||
|
||||
import nu.marginalia.index.query.filter.QueryFilterStepIf;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/** Builds a query.
|
||||
* <p />
|
||||
* Note: The query builder may omit predicates that are deemed redundant.
|
||||
@@ -11,14 +9,13 @@ import java.util.List;
|
||||
public interface IndexQueryBuilder {
|
||||
/** Filters documents that also contain termId, within the full index.
|
||||
*/
|
||||
IndexQueryBuilder also(long termId);
|
||||
IndexQueryBuilder also(long termId, IndexSearchBudget budget);
|
||||
|
||||
/** Excludes documents that contain termId, within the full index
|
||||
*/
|
||||
IndexQueryBuilder not(long termId);
|
||||
IndexQueryBuilder not(long termId, IndexSearchBudget budget);
|
||||
|
||||
IndexQueryBuilder addInclusionFilter(QueryFilterStepIf filterStep);
|
||||
IndexQueryBuilder addInclusionFilterAny(List<QueryFilterStepIf> filterStep);
|
||||
|
||||
IndexQuery build();
|
||||
}
|
||||
|
@@ -1,71 +0,0 @@
|
||||
package nu.marginalia.index.query.filter;
|
||||
|
||||
import nu.marginalia.array.page.LongQueryBuffer;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.StringJoiner;
|
||||
|
||||
public class QueryFilterAllOf implements QueryFilterStepIf {
|
||||
private final List<QueryFilterStepIf> steps;
|
||||
|
||||
public QueryFilterAllOf(List<? extends QueryFilterStepIf> steps) {
|
||||
this.steps = new ArrayList<>(steps.size());
|
||||
|
||||
for (var step : steps) {
|
||||
if (step instanceof QueryFilterAllOf allOf) {
|
||||
this.steps.addAll(allOf.steps);
|
||||
}
|
||||
else {
|
||||
this.steps.add(step);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public QueryFilterAllOf(QueryFilterStepIf... steps) {
|
||||
this(List.of(steps));
|
||||
}
|
||||
|
||||
public double cost() {
|
||||
double prod = 1.;
|
||||
|
||||
for (var step : steps) {
|
||||
double cost = step.cost();
|
||||
if (cost > 1.0) {
|
||||
prod *= Math.log(cost);
|
||||
}
|
||||
else {
|
||||
prod += cost;
|
||||
}
|
||||
}
|
||||
|
||||
return prod;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean test(long value) {
|
||||
for (var step : steps) {
|
||||
if (!step.test(value))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
public void apply(LongQueryBuffer buffer) {
|
||||
if (steps.isEmpty())
|
||||
return;
|
||||
|
||||
for (var step : steps) {
|
||||
step.apply(buffer);
|
||||
}
|
||||
}
|
||||
|
||||
public String describe() {
|
||||
StringJoiner sj = new StringJoiner(",", "[All Of: ", "]");
|
||||
for (var step : steps) {
|
||||
sj.add(step.describe());
|
||||
}
|
||||
return sj.toString();
|
||||
}
|
||||
}
|
@@ -1,86 +0,0 @@
|
||||
package nu.marginalia.index.query.filter;
|
||||
|
||||
import nu.marginalia.array.page.LongQueryBuffer;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.StringJoiner;
|
||||
|
||||
public class QueryFilterAnyOf implements QueryFilterStepIf {
|
||||
private final List<QueryFilterStepIf> steps;
|
||||
|
||||
public QueryFilterAnyOf(List<? extends QueryFilterStepIf> steps) {
|
||||
this.steps = new ArrayList<>(steps.size());
|
||||
|
||||
for (var step : steps) {
|
||||
if (step instanceof QueryFilterAnyOf anyOf) {
|
||||
this.steps.addAll(anyOf.steps);
|
||||
} else {
|
||||
this.steps.add(step);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public QueryFilterAnyOf(QueryFilterStepIf... steps) {
|
||||
this(List.of(steps));
|
||||
}
|
||||
|
||||
public double cost() {
|
||||
return steps.stream().mapToDouble(QueryFilterStepIf::cost).sum();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean test(long value) {
|
||||
for (var step : steps) {
|
||||
if (step.test(value))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
public void apply(LongQueryBuffer buffer) {
|
||||
if (steps.isEmpty())
|
||||
return;
|
||||
|
||||
if (steps.size() == 1) {
|
||||
steps.getFirst().apply(buffer);
|
||||
return;
|
||||
}
|
||||
|
||||
int start = 0;
|
||||
final int endOfValidData = buffer.end; // End of valid data range
|
||||
|
||||
// The filters act as a partitioning function, where anything before buffer.end
|
||||
// is "in", and is guaranteed to be sorted; and anything after buffer.end is "out"
|
||||
// but no sorting guaranteed is provided.
|
||||
|
||||
// To provide a conditional filter, we re-sort the "out" range, slice it and apply filtering to the slice
|
||||
|
||||
for (var step : steps)
|
||||
{
|
||||
var slice = buffer.slice(start, endOfValidData);
|
||||
slice.data.sort(0, slice.size());
|
||||
|
||||
step.apply(slice);
|
||||
start += slice.end;
|
||||
}
|
||||
|
||||
// After we're done, read and write pointers should be 0 and "end" should be the length of valid data,
|
||||
// normally done through buffer.finalizeFiltering(); but that won't work here
|
||||
buffer.reset();
|
||||
buffer.end = start;
|
||||
|
||||
// After all filters have been applied, we must re-sort all the retained data
|
||||
// to uphold the sortedness contract
|
||||
buffer.data.sort(0, buffer.end);
|
||||
}
|
||||
|
||||
public String describe() {
|
||||
StringJoiner sj = new StringJoiner(",", "[Any Of: ", "]");
|
||||
for (var step : steps) {
|
||||
sj.add(step.describe());
|
||||
}
|
||||
return sj.toString();
|
||||
}
|
||||
}
|
@@ -4,11 +4,6 @@ import nu.marginalia.array.page.LongQueryBuffer;
|
||||
|
||||
public class QueryFilterLetThrough implements QueryFilterStepIf {
|
||||
|
||||
@Override
|
||||
public boolean test(long value) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void apply(LongQueryBuffer buffer) {
|
||||
buffer.retainAll();
|
||||
|
@@ -5,11 +5,6 @@ import nu.marginalia.array.page.LongQueryBuffer;
|
||||
public class QueryFilterNoPass implements QueryFilterStepIf {
|
||||
static final QueryFilterStepIf instance = new QueryFilterNoPass();
|
||||
|
||||
@Override
|
||||
public boolean test(long value) {
|
||||
return false;
|
||||
}
|
||||
|
||||
public void apply(LongQueryBuffer buffer) {
|
||||
buffer.finalizeFiltering();
|
||||
}
|
||||
|
@@ -1,27 +0,0 @@
|
||||
package nu.marginalia.index.query.filter;
|
||||
|
||||
import java.util.function.LongPredicate;
|
||||
|
||||
public class QueryFilterStepExcludeFromPredicate implements QueryFilterStepIf {
|
||||
private final LongPredicate pred;
|
||||
|
||||
public QueryFilterStepExcludeFromPredicate(LongPredicate pred) {
|
||||
this.pred = pred;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean test(long value) {
|
||||
return !pred.test(value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public double cost() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String describe() {
|
||||
return "[!Predicate]";
|
||||
}
|
||||
|
||||
}
|
@@ -1,27 +0,0 @@
|
||||
package nu.marginalia.index.query.filter;
|
||||
|
||||
import java.util.function.LongPredicate;
|
||||
|
||||
public class QueryFilterStepFromPredicate implements QueryFilterStepIf {
|
||||
private final LongPredicate pred;
|
||||
|
||||
public QueryFilterStepFromPredicate(LongPredicate pred) {
|
||||
this.pred = pred;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean test(long value) {
|
||||
return pred.test(value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public double cost() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String describe() {
|
||||
return "[Predicate]";
|
||||
}
|
||||
|
||||
}
|
@@ -3,8 +3,6 @@ package nu.marginalia.index.query.filter;
|
||||
import nu.marginalia.array.page.LongQueryBuffer;
|
||||
|
||||
public interface QueryFilterStepIf extends Comparable<QueryFilterStepIf> {
|
||||
boolean test(long value);
|
||||
|
||||
double cost();
|
||||
|
||||
default int compareTo(QueryFilterStepIf other) {
|
||||
@@ -22,17 +20,7 @@ public interface QueryFilterStepIf extends Comparable<QueryFilterStepIf> {
|
||||
*
|
||||
* <p>ASSUMPTION: buffer is sorted up until end.</p>
|
||||
*/
|
||||
default void apply(LongQueryBuffer buffer) {
|
||||
while (buffer.hasMore()) {
|
||||
if (test(buffer.currentValue())) {
|
||||
buffer.retainAndAdvance();
|
||||
}
|
||||
else {
|
||||
buffer.rejectAndAdvance();
|
||||
}
|
||||
}
|
||||
buffer.finalizeFiltering();
|
||||
}
|
||||
void apply(LongQueryBuffer buffer);
|
||||
|
||||
}
|
||||
|
||||
|
@@ -1,93 +0,0 @@
|
||||
package nu.marginalia.index.query.filter;
|
||||
|
||||
import nu.marginalia.array.page.LongQueryBuffer;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
class QueryFilterStepIfTest {
|
||||
|
||||
private LongQueryBuffer createBuffer(long... data) {
|
||||
return new LongQueryBuffer(data, data.length);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPassThrough() {
|
||||
var buffer = createBuffer(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
|
||||
var filter = new QueryFilterLetThrough();
|
||||
filter.apply(buffer);
|
||||
assertArrayEquals(new long[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, buffer.copyData());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoPass() {
|
||||
var buffer = createBuffer(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
|
||||
var filter = new QueryFilterNoPass();
|
||||
filter.apply(buffer);
|
||||
assertArrayEquals(new long[]{}, buffer.copyData());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIncludePredicate() {
|
||||
var buffer = createBuffer(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
|
||||
var filter = new QueryFilterStepFromPredicate(value -> value % 2 == 0);
|
||||
filter.apply(buffer);
|
||||
assertArrayEquals(new long[]{2, 4, 6, 8, 10}, buffer.copyData());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExcludePredicate() {
|
||||
var buffer = createBuffer(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
|
||||
var filter = new QueryFilterStepExcludeFromPredicate(value -> value % 2 == 1);
|
||||
filter.apply(buffer);
|
||||
assertArrayEquals(new long[]{2, 4, 6, 8, 10}, buffer.copyData());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSuccessiveApplication() {
|
||||
var buffer = createBuffer(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
|
||||
var filter1 = new QueryFilterStepFromPredicate(value -> value % 2 == 0);
|
||||
var filter2 = new QueryFilterStepExcludeFromPredicate(value -> value <= 6);
|
||||
filter1.apply(buffer);
|
||||
filter2.apply(buffer);
|
||||
assertArrayEquals(new long[]{8, 10}, buffer.copyData());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSuccessiveApplicationWithAllOf() {
|
||||
var buffer = createBuffer(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
|
||||
var filter1 = new QueryFilterStepFromPredicate(value -> value % 2 == 0);
|
||||
var filter2 = new QueryFilterStepExcludeFromPredicate(value -> value <= 6);
|
||||
new QueryFilterAllOf(List.of(filter1, filter2)).apply(buffer);
|
||||
assertArrayEquals(new long[]{8, 10}, buffer.copyData());
|
||||
}
|
||||
@Test
|
||||
public void testCombinedOrAnd() {
|
||||
var buffer = createBuffer(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
|
||||
|
||||
var filter1 = new QueryFilterStepFromPredicate(value -> value % 2 == 0);
|
||||
var filter2 = new QueryFilterStepFromPredicate(value -> value <= 5);
|
||||
var filter1_2 = new QueryFilterAllOf(List.of(filter1, filter2));
|
||||
|
||||
var filter3 = new QueryFilterStepFromPredicate(value -> value % 2 == 1);
|
||||
var filter4 = new QueryFilterStepFromPredicate(value -> value > 5);
|
||||
var filter3_4 = new QueryFilterAllOf(List.of(filter3, filter4));
|
||||
|
||||
var filter12_34 = new QueryFilterAnyOf(List.of(filter1_2, filter3_4));
|
||||
|
||||
filter12_34.apply(buffer);
|
||||
|
||||
assertArrayEquals(new long[]{2, 4, 7, 9}, buffer.copyData());
|
||||
}
|
||||
@Test
|
||||
public void testCombinedApplication() {
|
||||
var buffer = createBuffer(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
|
||||
var filter1 = new QueryFilterStepFromPredicate(value -> value % 3 == 0);
|
||||
var filter2 = new QueryFilterStepFromPredicate(value -> value % 5 == 0);
|
||||
var filter = new QueryFilterAnyOf(List.of(filter1, filter2));
|
||||
filter.apply(buffer);
|
||||
assertArrayEquals(new long[]{3, 5, 6, 9, 10}, buffer.copyData());
|
||||
}
|
||||
}
|
@@ -16,6 +16,7 @@ import nu.marginalia.index.index.StatefulIndex;
|
||||
import nu.marginalia.index.journal.IndexJournal;
|
||||
import nu.marginalia.index.journal.IndexJournalSlopWriter;
|
||||
import nu.marginalia.index.positions.TermData;
|
||||
import nu.marginalia.index.query.IndexSearchBudget;
|
||||
import nu.marginalia.index.results.model.ids.CombinedDocIdList;
|
||||
import nu.marginalia.linkdb.docs.DocumentDbReader;
|
||||
import nu.marginalia.linkdb.docs.DocumentDbWriter;
|
||||
@@ -156,7 +157,7 @@ public class CombinedIndexReaderTest {
|
||||
var reader = indexFactory.getCombinedIndexReader();
|
||||
var query = reader
|
||||
.findFullWord(kw("hello"))
|
||||
.also(kw("world"))
|
||||
.also(kw("world"), new IndexSearchBudget(10_000))
|
||||
.build();
|
||||
|
||||
var buffer = new LongQueryBuffer(32);
|
||||
@@ -198,8 +199,8 @@ public class CombinedIndexReaderTest {
|
||||
|
||||
var reader = indexFactory.getCombinedIndexReader();
|
||||
var query = reader.findFullWord(kw("hello"))
|
||||
.also(kw("world"))
|
||||
.not(kw("goodbye"))
|
||||
.also(kw("world"), new IndexSearchBudget(10_000))
|
||||
.not(kw("goodbye"), new IndexSearchBudget(10_000))
|
||||
.build();
|
||||
|
||||
var buffer = new LongQueryBuffer(32);
|
||||
@@ -255,18 +256,19 @@ public class CombinedIndexReaderTest {
|
||||
Path outputFileDocs = ReverseIndexFullFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexFullFileNames.FileIdentifier.DOCS, ReverseIndexFullFileNames.FileVersion.NEXT);
|
||||
Path outputFileWords = ReverseIndexFullFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexFullFileNames.FileIdentifier.WORDS, ReverseIndexFullFileNames.FileVersion.NEXT);
|
||||
Path outputFilePositions = ReverseIndexFullFileNames.resolve(IndexLocations.getCurrentIndex(fileStorageService), ReverseIndexFullFileNames.FileIdentifier.POSITIONS, ReverseIndexFullFileNames.FileVersion.NEXT);
|
||||
|
||||
Path workDir = IndexLocations.getIndexConstructionArea(fileStorageService);
|
||||
Path tmpDir = workDir.resolve("tmp");
|
||||
|
||||
if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir);
|
||||
|
||||
var constructor = new FullIndexConstructor(
|
||||
outputFileDocs,
|
||||
outputFileWords,
|
||||
outputFilePositions,
|
||||
DocIdRewriter.identity(),
|
||||
tmpDir);
|
||||
|
||||
var constructor =
|
||||
new FullIndexConstructor(
|
||||
outputFileDocs,
|
||||
outputFileWords,
|
||||
outputFilePositions,
|
||||
DocIdRewriter.identity(),
|
||||
tmpDir);
|
||||
constructor.createReverseIndex(new FakeProcessHeartbeat(), "name", workDir);
|
||||
}
|
||||
|
||||
|
@@ -411,8 +411,6 @@ public class IndexQueryServiceIntegrationSmokeTest {
|
||||
.filter(v -> (id % v) == 0)
|
||||
.toArray();
|
||||
|
||||
System.out.println("id:" + id + " factors: " + Arrays.toString(factors));
|
||||
|
||||
long fullId = fullId(id);
|
||||
|
||||
ldbw.add(new DocdbUrlDetail(
|
||||
|
@@ -21,7 +21,7 @@ dependencies {
|
||||
implementation libs.lz4
|
||||
implementation libs.guava
|
||||
|
||||
implementation project(':code:libraries:array:cpp')
|
||||
implementation project(':code:libraries:native')
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
testImplementation libs.bundles.junit
|
||||
|
@@ -1,26 +0,0 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
}
|
||||
|
||||
java {
|
||||
toolchain {
|
||||
languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion))
|
||||
}
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation libs.bundles.slf4j
|
||||
}
|
||||
|
||||
apply from: "$rootProject.projectDir/srcsets.gradle"
|
||||
|
||||
// We use a custom task to compile the C++ code into a shared library
|
||||
// with a shellscript as gradle's c++ tasks are kind of insufferable
|
||||
|
||||
tasks.register('compileCpp', Exec) {
|
||||
inputs.files('compile.sh', 'src/main/cpp/cpphelpers.cpp', 'src/main/public/cpphelpers.h')
|
||||
outputs.file 'resources/libcpp.so'
|
||||
commandLine 'sh', 'compile.sh'
|
||||
}
|
||||
|
||||
processResources.dependsOn('compileCpp')
|
@@ -1,10 +0,0 @@
|
||||
#!/usr/bin/env sh
|
||||
|
||||
CXX=${CXX:-g++}
|
||||
|
||||
if ! which ${CXX} > /dev/null; then
|
||||
echo "g++ not found, skipping compilation"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
${CXX} -O3 -march=native -std=c++14 -shared -Isrc/main/public src/main/cpp/*.cpp -o resources/libcpp.so
|
@@ -1,8 +0,0 @@
|
||||
#include <stdint.h>
|
||||
|
||||
#pragma once
|
||||
|
||||
extern "C" {
|
||||
void ms_sort_64(int64_t* area, uint64_t start, uint64_t end);
|
||||
void ms_sort_128(int64_t* area, uint64_t start, uint64_t end);
|
||||
}
|
@@ -0,0 +1,55 @@
|
||||
package nu.marginalia.array;
|
||||
|
||||
import nu.marginalia.ffi.LinuxSystemCalls;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.nio.file.Path;
|
||||
|
||||
public class DirectFileReader implements AutoCloseable {
|
||||
int fd;
|
||||
|
||||
public DirectFileReader(Path filename) throws IOException {
|
||||
fd = LinuxSystemCalls.openDirect(filename);
|
||||
if (fd < 0) {
|
||||
throw new IOException("Error opening direct file: " + filename);
|
||||
}
|
||||
}
|
||||
|
||||
public void readAligned(LongArray dest, long offset) throws IOException {
|
||||
readAligned(dest.getMemorySegment(), offset);
|
||||
}
|
||||
|
||||
public void readAligned(MemorySegment segment, long offset) throws IOException {
|
||||
if (LinuxSystemCalls.readAt(fd, segment, offset) != segment.byteSize()) {
|
||||
throw new IOException("Failed to read data at " + offset);
|
||||
}
|
||||
}
|
||||
|
||||
public void readUnaligned(MemorySegment dest, MemorySegment alignedBuffer, long fileOffset) throws IOException {
|
||||
int destOffset = 0;
|
||||
|
||||
for (long totalBytesToCopy = dest.byteSize(); totalBytesToCopy > 0; ) {
|
||||
long alignedPageAddress = fileOffset & -4096L;
|
||||
long srcPageOffset = fileOffset & 4095L;
|
||||
long srcPageEnd = Math.min(srcPageOffset + totalBytesToCopy, 4096);
|
||||
|
||||
// wrapper for O_DIRECT pread
|
||||
if (LinuxSystemCalls.readAt(fd, alignedBuffer, alignedPageAddress) != alignedBuffer.byteSize()) {
|
||||
throw new IOException("Failed to read data at " + alignedPageAddress + " of size " + dest.byteSize());
|
||||
}
|
||||
|
||||
int bytesToCopy = (int) (srcPageEnd - srcPageOffset);
|
||||
|
||||
MemorySegment.copy(alignedBuffer, srcPageOffset, dest, destOffset, bytesToCopy);
|
||||
|
||||
destOffset += bytesToCopy;
|
||||
fileOffset += bytesToCopy;
|
||||
totalBytesToCopy -= bytesToCopy;
|
||||
}
|
||||
}
|
||||
|
||||
public void close() {
|
||||
LinuxSystemCalls.closeFd(fd);
|
||||
}
|
||||
}
|
@@ -5,6 +5,7 @@ import nu.marginalia.array.page.UnsafeLongArray;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.foreign.Arena;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
@@ -25,6 +26,13 @@ public class LongArrayFactory {
|
||||
return SegmentLongArray.onHeap(Arena.ofShared(), size);
|
||||
}
|
||||
|
||||
public static LongArray onHeapManaged(Arena arena, long size) {
|
||||
if (useUnsafe)
|
||||
return UnsafeLongArray.wrap(arena.allocate(8 * size));
|
||||
else
|
||||
return SegmentLongArray.wrap(arena.allocate(8 * size));
|
||||
}
|
||||
|
||||
public static LongArray mmapForReadingConfined(Path filename) throws IOException {
|
||||
if (useUnsafe)
|
||||
return UnsafeLongArray.fromMmapReadOnly(Arena.ofConfined(), filename, 0, Files.size(filename) / 8);
|
||||
@@ -66,4 +74,13 @@ public class LongArrayFactory {
|
||||
else
|
||||
return SegmentLongArray.fromMmapReadWrite(Arena.ofShared(), filename, 0, size);
|
||||
}
|
||||
|
||||
public static LongArray wrap(MemorySegment ms) {
|
||||
if (useUnsafe) {
|
||||
return UnsafeLongArray.wrap(ms);
|
||||
}
|
||||
else {
|
||||
return SegmentLongArray.wrap(ms);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -20,6 +20,27 @@ public interface LongArraySearch extends LongArrayBase {
|
||||
return fromIndex + low;
|
||||
}
|
||||
|
||||
default long binarySearch2(long key, long fromIndex, long toIndex) {
|
||||
long low = 0;
|
||||
long len = toIndex - fromIndex;
|
||||
|
||||
while (len > 0) {
|
||||
var half = len / 2;
|
||||
long val = get(fromIndex + low + half);
|
||||
if (val < key) {
|
||||
low += len - half;
|
||||
}
|
||||
else if (val == key) {
|
||||
low += half;
|
||||
break;
|
||||
}
|
||||
len = half;
|
||||
}
|
||||
|
||||
return fromIndex + low;
|
||||
}
|
||||
|
||||
|
||||
default long binarySearchN(int sz, long key, long fromIndex, long toIndex) {
|
||||
long low = 0;
|
||||
long high = (toIndex - fromIndex)/sz - 1;
|
||||
@@ -33,6 +54,7 @@ public interface LongArraySearch extends LongArrayBase {
|
||||
len = half;
|
||||
}
|
||||
|
||||
|
||||
return fromIndex + sz * low;
|
||||
}
|
||||
|
||||
|
@@ -1,7 +1,7 @@
|
||||
package nu.marginalia.array.algo;
|
||||
|
||||
import nu.marginalia.NativeAlgos;
|
||||
import nu.marginalia.array.LongArray;
|
||||
import nu.marginalia.ffi.NativeAlgos;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.channels.FileChannel;
|
||||
|
@@ -3,6 +3,8 @@ package nu.marginalia.array.page;
|
||||
import nu.marginalia.array.LongArray;
|
||||
import nu.marginalia.array.LongArrayFactory;
|
||||
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.lang.foreign.ValueLayout;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Arrays;
|
||||
|
||||
@@ -36,13 +38,12 @@ public class LongQueryBuffer {
|
||||
|
||||
public LongQueryBuffer(int size) {
|
||||
this.data = LongArrayFactory.onHeapConfined(size);
|
||||
this.end = size;
|
||||
this.end = 0;
|
||||
}
|
||||
|
||||
public LongQueryBuffer(long[] data, int size) {
|
||||
this.data = LongArrayFactory.onHeapConfined(size);
|
||||
this.data.set(0, data);
|
||||
|
||||
this.end = size;
|
||||
}
|
||||
|
||||
@@ -52,6 +53,26 @@ public class LongQueryBuffer {
|
||||
return copy;
|
||||
}
|
||||
|
||||
public long[] copyFilterData() {
|
||||
long[] copy = new long[write];
|
||||
data.forEach(0, write, (pos, val) -> copy[(int)pos]=val );
|
||||
return copy;
|
||||
}
|
||||
|
||||
public boolean fitsMore() {
|
||||
return end < data.size();
|
||||
}
|
||||
|
||||
public int addData(MemorySegment source, long sourceOffset, int nMax) {
|
||||
int n = Math.min(nMax, (int) data.size() - end);
|
||||
|
||||
MemorySegment.copy(source, ValueLayout.JAVA_LONG, sourceOffset, data.getMemorySegment(), ValueLayout.JAVA_LONG, 8L * end, n);
|
||||
|
||||
end += n;
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
/** Dispose of the buffer and release resources */
|
||||
public void dispose() {
|
||||
data.close();
|
||||
|
@@ -29,6 +29,10 @@ public class SegmentLongArray implements LongArray {
|
||||
this.arena = arena;
|
||||
}
|
||||
|
||||
public static SegmentLongArray wrap(MemorySegment segment) {
|
||||
return new SegmentLongArray(segment, null);
|
||||
}
|
||||
|
||||
public static SegmentLongArray onHeap(Arena arena, long size) {
|
||||
return new SegmentLongArray(arena.allocate(WORD_SIZE*size, 16), arena);
|
||||
}
|
||||
|
@@ -50,6 +50,10 @@ public class UnsafeLongArray implements LongArray {
|
||||
this.channel = channel;
|
||||
}
|
||||
|
||||
public static UnsafeLongArray wrap(MemorySegment ms) {
|
||||
return new UnsafeLongArray(ms, null);
|
||||
}
|
||||
|
||||
public static UnsafeLongArray onHeap(Arena arena, long size) {
|
||||
return new UnsafeLongArray(arena.allocate(WORD_SIZE*size, 16), arena);
|
||||
}
|
||||
@@ -77,6 +81,10 @@ public class UnsafeLongArray implements LongArray {
|
||||
|
||||
@Override
|
||||
public LongArray range(long start, long end) {
|
||||
|
||||
assert end >= start : end + "<" + start;
|
||||
assert end <= size() : end + "<" + size();
|
||||
|
||||
return new UnsafeLongArray(
|
||||
segment.asSlice(
|
||||
start * JAVA_LONG.byteSize(),
|
||||
@@ -93,6 +101,7 @@ public class UnsafeLongArray implements LongArray {
|
||||
|
||||
@Override
|
||||
public long get(long at) {
|
||||
|
||||
try {
|
||||
return unsafe.getLong(segment.address() + at * JAVA_LONG.byteSize());
|
||||
}
|
||||
@@ -120,6 +129,7 @@ public class UnsafeLongArray implements LongArray {
|
||||
|
||||
@Override
|
||||
public void set(long start, long end, LongBuffer buffer, int bufferStart) {
|
||||
System.out.println("setA@"+ start + "#" + hashCode() + "-" + Thread.currentThread().threadId());
|
||||
for (int i = 0; i < end - start; i++) {
|
||||
unsafe.putLong(segment.address() + (start + i) * JAVA_LONG.byteSize(), buffer.get(bufferStart + i));
|
||||
}
|
||||
|
@@ -0,0 +1,6 @@
|
||||
package nu.marginalia.array.pool;
|
||||
|
||||
public enum BufferEvictionPolicy {
|
||||
READ_ONCE,
|
||||
CACHE
|
||||
}
|
@@ -0,0 +1,220 @@
|
||||
package nu.marginalia.array.pool;
|
||||
|
||||
import nu.marginalia.ffi.LinuxSystemCalls;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.io.IOException;
|
||||
import java.lang.foreign.Arena;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.lang.foreign.ValueLayout;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
public class BufferPool implements AutoCloseable {
|
||||
private static final Logger logger = LoggerFactory.getLogger(BufferPool.class);
|
||||
|
||||
private final MemoryPage[] pages;
|
||||
|
||||
private final long fileSize;
|
||||
private final Arena arena;
|
||||
private final int fd;
|
||||
private final int pageSizeBytes;
|
||||
private PoolLru poolLru;
|
||||
|
||||
private final AtomicInteger diskReadCount = new AtomicInteger();
|
||||
private final AtomicInteger cacheReadCount = new AtomicInteger();
|
||||
|
||||
private volatile boolean running = true;
|
||||
|
||||
/** Unassociate all buffers with their addresses, ensuring they will not be cacheable */
|
||||
public synchronized void reset() {
|
||||
for (var page : pages) {
|
||||
page.pageAddress(-1);
|
||||
}
|
||||
try {
|
||||
poolLru.stop();
|
||||
} catch (InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
poolLru = new PoolLru(pages);
|
||||
}
|
||||
|
||||
public BufferPool(Path filename, int pageSizeBytes, int poolSize) {
|
||||
this.fd = LinuxSystemCalls.openDirect(filename);
|
||||
this.pageSizeBytes = pageSizeBytes;
|
||||
try {
|
||||
this.fileSize = Files.size(filename);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
this.arena = Arena.ofShared();
|
||||
this.pages = new UnsafeMemoryPage[poolSize];
|
||||
|
||||
MemorySegment memoryArea = arena.allocate((long) pageSizeBytes*poolSize, 4096);
|
||||
for (int i = 0; i < pages.length; i++) {
|
||||
if (Boolean.getBoolean("system.noSunMiscUnsafe")) {
|
||||
pages[i] = (MemoryPage) new SegmentMemoryPage(memoryArea.asSlice((long) i*pageSizeBytes, pageSizeBytes), i);
|
||||
}
|
||||
else {
|
||||
pages[i] = (MemoryPage) new UnsafeMemoryPage(memoryArea.asSlice((long) i*pageSizeBytes, pageSizeBytes), i);
|
||||
}
|
||||
}
|
||||
|
||||
this.poolLru = new PoolLru(pages);
|
||||
|
||||
Thread.ofPlatform().start(() -> {
|
||||
int diskReadOld = 0;
|
||||
int cacheReadOld = 0;
|
||||
|
||||
while (running) {
|
||||
try {
|
||||
TimeUnit.SECONDS.sleep(30);
|
||||
} catch (InterruptedException e) {
|
||||
logger.info("Sleep interrupted", e);
|
||||
break;
|
||||
}
|
||||
|
||||
int diskRead = diskReadCount.get();
|
||||
int cacheRead = cacheReadCount.get();
|
||||
int heldCount = 0;
|
||||
for (var page : pages) {
|
||||
if (page.isHeld()) {
|
||||
heldCount++;
|
||||
}
|
||||
}
|
||||
|
||||
if (diskRead != diskReadOld || cacheRead != cacheReadOld) {
|
||||
logger.info("[#{}:{}] Disk/Cached: {}/{}, heldCount={}/{}, fqs={}, rcc={}", hashCode(), pageSizeBytes, diskRead, cacheRead, heldCount, pages.length, poolLru.getFreeQueueSize(), poolLru.getReclaimCycles());
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public void close() {
|
||||
running = false;
|
||||
|
||||
try {
|
||||
poolLru.stop();
|
||||
} catch (InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
LinuxSystemCalls.closeFd(fd);
|
||||
arena.close();
|
||||
|
||||
System.out.println("Disk read count: " + diskReadCount.get());
|
||||
System.out.println("Cached read count: " + cacheReadCount.get());
|
||||
}
|
||||
|
||||
@Nullable
|
||||
public MemoryPage getExistingBufferForReading(long address) {
|
||||
MemoryPage cachedBuffer = poolLru.get(address);
|
||||
if (cachedBuffer != null && cachedBuffer.pageAddress() == address) {
|
||||
|
||||
// Try to acquire the page normally
|
||||
if (cachedBuffer.acquireAsReader(address)) {
|
||||
cacheReadCount.incrementAndGet();
|
||||
|
||||
return cachedBuffer;
|
||||
}
|
||||
|
||||
if (cachedBuffer.pageAddress() != address)
|
||||
return null;
|
||||
|
||||
// The page we are looking for is currently being written
|
||||
waitForPageWrite(cachedBuffer);
|
||||
|
||||
if (cachedBuffer.acquireAsReader(address)) {
|
||||
this.cacheReadCount.incrementAndGet();
|
||||
return cachedBuffer;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public MemoryPage get(long address) {
|
||||
// Look through available pages for the one we're looking for
|
||||
MemoryPage buffer = getExistingBufferForReading(address);
|
||||
|
||||
if (buffer == null) {
|
||||
buffer = read(address, true);
|
||||
}
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
private MemoryPage read(long address, boolean acquire) {
|
||||
// If the page is not available, read it from the caller's thread
|
||||
if (address + pageSizeBytes > fileSize) {
|
||||
throw new RuntimeException("Address " + address + " too large for page size " + pageSizeBytes + " and file size" + fileSize);
|
||||
}
|
||||
if ((address & 511) != 0) {
|
||||
throw new RuntimeException("Address " + address + " not aligned");
|
||||
}
|
||||
MemoryPage buffer = acquireFreePage(address);
|
||||
poolLru.register(buffer);
|
||||
populateBuffer(buffer);
|
||||
|
||||
if (acquire) {
|
||||
if (!buffer.pinCount().compareAndSet(-1, 1)) {
|
||||
throw new IllegalStateException("Panic! Write lock was not held during write!");
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (!buffer.pinCount().compareAndSet(-1, 0)) {
|
||||
throw new IllegalStateException("Panic! Write lock was not held during write!");
|
||||
}
|
||||
}
|
||||
|
||||
diskReadCount.incrementAndGet();
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
private MemoryPage acquireFreePage(long address) {
|
||||
for (;;) {
|
||||
var free = poolLru.getFree();
|
||||
if (free != null && free.acquireForWriting(address)) {
|
||||
return free;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void populateBuffer(MemoryPage buffer) {
|
||||
if (getClass().desiredAssertionStatus()) {
|
||||
buffer.getMemorySegment().set(ValueLayout.JAVA_INT, 0, 9999);
|
||||
}
|
||||
LinuxSystemCalls.readAt(fd, buffer.getMemorySegment(), buffer.pageAddress());
|
||||
assert buffer.getMemorySegment().get(ValueLayout.JAVA_INT, 0) != 9999;
|
||||
buffer.dirty(false);
|
||||
|
||||
if (buffer.pinCount().get() > 1) {
|
||||
synchronized (buffer) {
|
||||
buffer.notifyAll();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void waitForPageWrite(MemoryPage page) {
|
||||
if (!page.dirty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
synchronized (page) {
|
||||
while (page.dirty()) {
|
||||
try {
|
||||
page.wait(0, 1000);
|
||||
}
|
||||
catch (InterruptedException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
@@ -0,0 +1,32 @@
|
||||
package nu.marginalia.array.pool;
|
||||
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
public interface MemoryPage extends AutoCloseable {
|
||||
boolean isHeld();
|
||||
|
||||
MemorySegment getMemorySegment();
|
||||
|
||||
byte getByte(int offset);
|
||||
int getInt(int offset);
|
||||
long getLong(int offset);
|
||||
|
||||
int binarySearchLong(long key, int baseOffset, int fromIndex, int toIndex);
|
||||
boolean acquireForWriting(long intendedAddress);
|
||||
boolean acquireAsReader(long expectedAddress);
|
||||
|
||||
AtomicInteger pinCount();
|
||||
|
||||
void increaseClock(int val);
|
||||
void touchClock(int val);
|
||||
boolean decreaseClock();
|
||||
|
||||
long pageAddress();
|
||||
void pageAddress(long address);
|
||||
|
||||
boolean dirty();
|
||||
void dirty(boolean val);
|
||||
|
||||
void close();
|
||||
}
|
186
code/libraries/array/java/nu/marginalia/array/pool/PoolLru.java
Normal file
186
code/libraries/array/java/nu/marginalia/array/pool/PoolLru.java
Normal file
@@ -0,0 +1,186 @@
|
||||
package nu.marginalia.array.pool;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.concurrent.locks.LockSupport;
|
||||
import java.util.concurrent.locks.StampedLock;
|
||||
|
||||
/** LRU for pool buffers
|
||||
* */
|
||||
public class PoolLru {
|
||||
private static final Logger logger = LoggerFactory.getLogger(PoolLru.class);
|
||||
|
||||
private final int maxSize;
|
||||
private final LinkedHashMap<Long, MemoryPage> backingMap;
|
||||
private final MemoryPage[] pages;
|
||||
|
||||
private final int[] freeQueue;
|
||||
private volatile long reclaimCycles;
|
||||
private final AtomicLong clockWriteIdx;
|
||||
private final AtomicLong clockReadIdx;
|
||||
|
||||
private final StampedLock lock = new StampedLock();
|
||||
private final Thread reclaimThread;
|
||||
|
||||
private volatile boolean running = true;
|
||||
|
||||
public PoolLru(MemoryPage[] pages) {
|
||||
backingMap = new LinkedHashMap<>(pages.length, 0.75f);
|
||||
this.pages = pages;
|
||||
// Pre-assign all entries with nonsense memory locations
|
||||
for (int i = 0; i < pages.length; i++) {
|
||||
backingMap.put(-i-1L, pages[i]);
|
||||
}
|
||||
maxSize = backingMap.size();
|
||||
|
||||
freeQueue = new int[pages.length];
|
||||
|
||||
for (int i = 0; i < freeQueue.length; i++) {
|
||||
freeQueue[i] = i;
|
||||
}
|
||||
|
||||
clockReadIdx = new AtomicLong();
|
||||
clockWriteIdx = new AtomicLong(freeQueue.length);
|
||||
|
||||
reclaimThread = Thread.ofPlatform().start(this::reclaimThread);
|
||||
}
|
||||
|
||||
public void stop() throws InterruptedException {
|
||||
running = false;
|
||||
reclaimThread.interrupt();
|
||||
reclaimThread.join();
|
||||
}
|
||||
/** Attempt to get a buffer already associated with the address */
|
||||
public MemoryPage get(long address) {
|
||||
var res = getAssociatedItem(address);
|
||||
if (res != null) {
|
||||
res.increaseClock(1);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
private MemoryPage getAssociatedItem(long address) {
|
||||
long stamp = lock.tryOptimisticRead();
|
||||
MemoryPage res = backingMap.get(address);
|
||||
if (lock.validate(stamp)) {
|
||||
return res;
|
||||
}
|
||||
stamp = lock.readLock();
|
||||
try {
|
||||
return backingMap.get(address);
|
||||
}
|
||||
finally {
|
||||
lock.unlockRead(stamp);
|
||||
}
|
||||
}
|
||||
|
||||
/** Associate the buffer with an address */
|
||||
public void register(MemoryPage buffer) {
|
||||
long stamp = lock.writeLock();
|
||||
try {
|
||||
backingMap.put(buffer.pageAddress(), buffer);
|
||||
buffer.touchClock(1);
|
||||
// Evict the last entry if we've exceeded the
|
||||
while (backingMap.size() >= maxSize) {
|
||||
backingMap.pollFirstEntry();
|
||||
}
|
||||
}
|
||||
finally {
|
||||
lock.unlockWrite(stamp);
|
||||
}
|
||||
}
|
||||
|
||||
public void deregister(MemoryPage buffer) {
|
||||
long stamp = lock.writeLock();
|
||||
try {
|
||||
backingMap.remove(buffer.pageAddress(), buffer);
|
||||
}
|
||||
finally {
|
||||
lock.unlockWrite(stamp);
|
||||
}
|
||||
}
|
||||
|
||||
/** Attempt to get a free buffer from the pool
|
||||
*
|
||||
* @return An unheld buffer, or null if the attempt failed
|
||||
* */
|
||||
public MemoryPage getFree() {
|
||||
for (;;) {
|
||||
var readIdx = clockReadIdx.get();
|
||||
var writeIdx = clockWriteIdx.get();
|
||||
|
||||
if (writeIdx - readIdx == freeQueue.length / 4) {
|
||||
LockSupport.unpark(reclaimThread);
|
||||
} else if (readIdx == writeIdx) {
|
||||
LockSupport.unpark(reclaimThread);
|
||||
synchronized (this) {
|
||||
try {
|
||||
wait(0, 1000);
|
||||
} catch (InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (clockReadIdx.compareAndSet(readIdx, readIdx + 1)) {
|
||||
return pages[freeQueue[(int) (readIdx % freeQueue.length)]];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void reclaimThread() {
|
||||
int pageIdx = 0;
|
||||
|
||||
while (running && !Thread.interrupted()) {
|
||||
long readIdx = clockReadIdx.get();
|
||||
long writeIdx = clockWriteIdx.get();
|
||||
int queueSize = (int) (writeIdx - readIdx);
|
||||
int targetQueueSize = freeQueue.length / 2;
|
||||
|
||||
if (queueSize >= targetQueueSize) {
|
||||
LockSupport.parkNanos(100_000);
|
||||
continue;
|
||||
}
|
||||
|
||||
int toClaim = targetQueueSize - queueSize;
|
||||
if (toClaim == 0)
|
||||
continue;
|
||||
|
||||
++reclaimCycles;
|
||||
do {
|
||||
if (++pageIdx >= pages.length) {
|
||||
pageIdx = 0;
|
||||
}
|
||||
var currentPage = pages[pageIdx];
|
||||
|
||||
if (currentPage.decreaseClock()) {
|
||||
if (!currentPage.isHeld()) {
|
||||
freeQueue[(int) (clockWriteIdx.getAndIncrement() % freeQueue.length)] = pageIdx;
|
||||
deregister(pages[pageIdx]);
|
||||
toClaim--;
|
||||
}
|
||||
else {
|
||||
currentPage.touchClock(1);
|
||||
}
|
||||
}
|
||||
|
||||
} while (running && toClaim >= 0);
|
||||
|
||||
synchronized (this) {
|
||||
notifyAll();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public int getFreeQueueSize() {
|
||||
return (int) (clockWriteIdx.get() - clockReadIdx.get());
|
||||
}
|
||||
|
||||
public long getReclaimCycles() {
|
||||
return reclaimCycles;
|
||||
}
|
||||
}
|
@@ -0,0 +1,163 @@
|
||||
package nu.marginalia.array.pool;
|
||||
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.lang.foreign.ValueLayout;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
/** Variant of SegmentLongArray that uses Unsafe to access the memory.
|
||||
* */
|
||||
|
||||
@SuppressWarnings("preview")
|
||||
public class SegmentMemoryPage implements MemoryPage, AutoCloseable {
|
||||
|
||||
private final MemorySegment segment;
|
||||
public final int ord;
|
||||
|
||||
private volatile long pageAddress = -1;
|
||||
private volatile boolean dirty = false;
|
||||
|
||||
/** Pin count is used as a read-write condition.
|
||||
* <p></p>
|
||||
* When the pin count is 0, the page is free.
|
||||
* When it is -1, it is held for writing.
|
||||
* When it is greater than 0, it is held for reading.
|
||||
*/
|
||||
private final AtomicInteger pinCount = new AtomicInteger(0);
|
||||
private final AtomicInteger clock = new AtomicInteger();
|
||||
|
||||
public SegmentMemoryPage(MemorySegment segment, int ord) {
|
||||
this.segment = segment;
|
||||
this.ord = ord;
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
return (int) segment.address();
|
||||
}
|
||||
public boolean equals(Object obj) {
|
||||
return obj == this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void increaseClock(int val) {
|
||||
clock.addAndGet(val);
|
||||
}
|
||||
@Override
|
||||
public void touchClock(int val) {
|
||||
clock.set(val);
|
||||
}
|
||||
@Override
|
||||
public boolean decreaseClock() {
|
||||
for (;;) {
|
||||
int cv = clock.get();
|
||||
if (cv == 0)
|
||||
return true;
|
||||
if (clock.compareAndSet(cv, cv-1)) {
|
||||
return cv == 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long pageAddress() {
|
||||
return pageAddress;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void pageAddress(long address) {
|
||||
this.pageAddress = address;
|
||||
}
|
||||
|
||||
@Override
|
||||
public AtomicInteger pinCount() {
|
||||
return pinCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean dirty() {
|
||||
return dirty;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void dirty(boolean val) {
|
||||
this.dirty = val;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isHeld() {
|
||||
return 0 != this.pinCount.get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte getByte(int offset) {
|
||||
return segment.get(ValueLayout.JAVA_BYTE, offset);
|
||||
}
|
||||
@Override
|
||||
public int getInt(int offset) {
|
||||
return segment.get(ValueLayout.JAVA_INT, offset);
|
||||
}
|
||||
@Override
|
||||
public long getLong(int offset) {
|
||||
return segment.get(ValueLayout.JAVA_LONG, offset);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int binarySearchLong(long key, int baseOffset, int fromIndex, int toIndex) {
|
||||
int low = 0;
|
||||
int len = toIndex - fromIndex;
|
||||
|
||||
while (len > 0) {
|
||||
var half = len / 2;
|
||||
long val = getLong(baseOffset + 8 * (fromIndex + low + half));
|
||||
if (val < key) {
|
||||
low += len - half;
|
||||
} else if (val == key) {
|
||||
low += half;
|
||||
break;
|
||||
}
|
||||
len = half;
|
||||
}
|
||||
|
||||
return fromIndex + low;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean acquireForWriting(long intendedAddress) {
|
||||
if (pinCount.compareAndSet(0, -1)) {
|
||||
pageAddress = intendedAddress;
|
||||
dirty = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean acquireAsReader(long expectedAddress) {
|
||||
int pinCountVal;
|
||||
|
||||
while ((pinCountVal = pinCount.get()) >= 0) {
|
||||
if (pinCount.compareAndSet(pinCountVal, pinCountVal+1)) {
|
||||
if (pageAddress != expectedAddress) {
|
||||
pinCount.decrementAndGet();
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/** Close yields the buffer back to the pool (unless held by multiple readers), but does not deallocate it */
|
||||
@Override
|
||||
public void close() {
|
||||
pinCount.decrementAndGet();
|
||||
}
|
||||
|
||||
@Override
|
||||
public MemorySegment getMemorySegment() {
|
||||
return segment;
|
||||
}
|
||||
|
||||
}
|
@@ -0,0 +1,167 @@
|
||||
package nu.marginalia.array.pool;
|
||||
|
||||
import nu.marginalia.array.page.UnsafeProvider;
|
||||
import sun.misc.Unsafe;
|
||||
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
/** Variant of SegmentLongArray that uses Unsafe to access the memory.
|
||||
* */
|
||||
|
||||
@SuppressWarnings("preview")
|
||||
public class UnsafeMemoryPage implements MemoryPage, AutoCloseable {
|
||||
|
||||
private static final Unsafe unsafe = UnsafeProvider.getUnsafe();
|
||||
|
||||
private final MemorySegment segment;
|
||||
public final int ord;
|
||||
|
||||
private volatile long pageAddress = -1;
|
||||
private volatile boolean dirty = false;
|
||||
|
||||
/** Pin count is used as a read-write condition.
|
||||
* <p></p>
|
||||
* When the pin count is 0, the page is free.
|
||||
* When it is -1, it is held for writing.
|
||||
* When it is greater than 0, it is held for reading.
|
||||
*/
|
||||
private final AtomicInteger pinCount = new AtomicInteger(0);
|
||||
private final AtomicInteger clock = new AtomicInteger();
|
||||
|
||||
public UnsafeMemoryPage(MemorySegment segment, int ord) {
|
||||
this.segment = segment;
|
||||
this.ord = ord;
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
return (int) segment.address();
|
||||
}
|
||||
public boolean equals(Object obj) {
|
||||
return obj == this;
|
||||
}
|
||||
|
||||
public void increaseClock(int val) {
|
||||
clock.addAndGet(val);
|
||||
}
|
||||
public void touchClock(int val) {
|
||||
clock.set(val);
|
||||
}
|
||||
public boolean decreaseClock() {
|
||||
for (;;) {
|
||||
int cv = clock.get();
|
||||
if (cv == 0)
|
||||
return true;
|
||||
if (clock.compareAndSet(cv, cv-1)) {
|
||||
return cv == 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long pageAddress() {
|
||||
return pageAddress;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void pageAddress(long address) {
|
||||
this.pageAddress = address;
|
||||
}
|
||||
|
||||
@Override
|
||||
public AtomicInteger pinCount() {
|
||||
return pinCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean dirty() {
|
||||
return dirty;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void dirty(boolean val) {
|
||||
this.dirty = val;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isHeld() {
|
||||
return 0 != this.pinCount.get();
|
||||
}
|
||||
|
||||
public byte getByte(int offset) {
|
||||
assert offset >= 0;
|
||||
assert offset + 1 <= segment.byteSize();
|
||||
return unsafe.getByte(segment.address() + offset);
|
||||
}
|
||||
public int getInt(int offset) {
|
||||
assert offset >= 0;
|
||||
assert offset + 4 <= segment.byteSize();
|
||||
return unsafe.getInt(segment.address() + offset);
|
||||
}
|
||||
public long getLong(int offset) {
|
||||
assert offset >= 0;
|
||||
assert offset + 8 <= segment.byteSize();
|
||||
return unsafe.getLong(segment.address() + offset);
|
||||
}
|
||||
|
||||
public int binarySearchLong(long key, int baseOffset, int fromIndex, int toIndex) {
|
||||
int low = 0;
|
||||
int len = toIndex - fromIndex;
|
||||
|
||||
while (len > 0) {
|
||||
var half = len / 2;
|
||||
long val = getLong(baseOffset + 8 * (fromIndex + low + half));
|
||||
if (val < key) {
|
||||
low += len - half;
|
||||
}
|
||||
else if (val == key) {
|
||||
low += half;
|
||||
break;
|
||||
}
|
||||
len = half;
|
||||
}
|
||||
|
||||
return fromIndex + low;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean acquireForWriting(long intendedAddress) {
|
||||
if (pinCount.compareAndSet(0, -1)) {
|
||||
pageAddress = intendedAddress;
|
||||
dirty = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean acquireAsReader(long expectedAddress) {
|
||||
int pinCountVal;
|
||||
|
||||
while ((pinCountVal = pinCount.get()) >= 0) {
|
||||
if (pinCount.compareAndSet(pinCountVal, pinCountVal+1)) {
|
||||
if (pageAddress != expectedAddress) {
|
||||
pinCount.decrementAndGet();
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/** Close yields the buffer back to the pool (unless held by multiple readers), but does not deallocate it */
|
||||
@Override
|
||||
public void close() {
|
||||
pinCount.decrementAndGet();
|
||||
}
|
||||
|
||||
@Override
|
||||
public MemorySegment getMemorySegment() {
|
||||
return segment;
|
||||
}
|
||||
|
||||
}
|
@@ -1,8 +1,8 @@
|
||||
package nu.marginalia.array.page;
|
||||
|
||||
import nu.marginalia.NativeAlgos;
|
||||
import nu.marginalia.array.LongArray;
|
||||
import nu.marginalia.array.algo.LongArraySort;
|
||||
import nu.marginalia.ffi.NativeAlgos;
|
||||
import org.openjdk.jmh.annotations.*;
|
||||
|
||||
import java.lang.foreign.Arena;
|
||||
|
72
code/libraries/array/test/nu/marginalia/NativeAlgosTest.java
Normal file
72
code/libraries/array/test/nu/marginalia/NativeAlgosTest.java
Normal file
@@ -0,0 +1,72 @@
|
||||
package nu.marginalia;
|
||||
|
||||
import nu.marginalia.array.DirectFileReader;
|
||||
import nu.marginalia.array.LongArray;
|
||||
import nu.marginalia.array.LongArrayFactory;
|
||||
import nu.marginalia.ffi.LinuxSystemCalls;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.foreign.Arena;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.lang.foreign.ValueLayout;
|
||||
import java.nio.file.Path;
|
||||
|
||||
public class NativeAlgosTest {
|
||||
@Test
|
||||
public void test() throws IOException {
|
||||
LongArray array = LongArrayFactory.mmapForWritingShared(Path.of("/tmp/test"), 1024);
|
||||
for (int i = 0; i < 1024; i++) {
|
||||
array.set(i, i);
|
||||
}
|
||||
array.close();
|
||||
|
||||
var ms = Arena.global().allocate(512, 8);
|
||||
|
||||
int fd = LinuxSystemCalls.openDirect(Path.of("/tmp/test"));
|
||||
int ret = LinuxSystemCalls.readAt(fd, ms, 512);
|
||||
System.out.println(ret);
|
||||
System.out.println(ms.byteSize());
|
||||
LinuxSystemCalls.closeFd(fd);
|
||||
|
||||
var array2 = LongArrayFactory.wrap(ms);
|
||||
for (int i = 0; i < array2.size(); i++) {
|
||||
System.out.println(i + ": " + array2.get(i));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
void testDirectFileReader() throws IOException {
|
||||
LongArray array = LongArrayFactory.mmapForWritingShared(Path.of("/tmp/test"), 1024);
|
||||
for (int i = 0; i < 1024; i++) {
|
||||
array.set(i, i);
|
||||
}
|
||||
array.close();
|
||||
|
||||
|
||||
try (var dfr = new DirectFileReader(Path.of("/tmp/test"))) {
|
||||
LongArray array2 = LongArrayFactory.onHeapConfined(64);
|
||||
dfr.readAligned(array2, 0);
|
||||
for (int i = 0; i < array2.size(); i++) {
|
||||
System.out.println(i + ": " + array2.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
var alignedBuffer = Arena.ofAuto().allocate(4096, 4096);
|
||||
try (var dfr = new DirectFileReader(Path.of("/tmp/test"))) {
|
||||
MemorySegment dest = Arena.ofAuto().allocate(504, 1);
|
||||
dfr.readUnaligned(dest, alignedBuffer, 8);
|
||||
|
||||
for (int i = 0; i < dest.byteSize(); i+=8) {
|
||||
System.out.println(i + ": " + dest.get(ValueLayout.JAVA_LONG, i));
|
||||
}
|
||||
|
||||
dfr.readUnaligned(dest, alignedBuffer, 4000);
|
||||
for (int i = 0; i < dest.byteSize(); i+=8) {
|
||||
System.out.println(i + ": " + dest.get(ValueLayout.JAVA_LONG, i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@@ -0,0 +1,95 @@
|
||||
package nu.marginalia.array;
|
||||
|
||||
import nu.marginalia.ffi.LinuxSystemCalls;
|
||||
import nu.marginalia.uring.UringFileReader;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.foreign.Arena;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.lang.foreign.ValueLayout;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
|
||||
public class NativeAlgosTest {
|
||||
@Test
|
||||
public void test() throws IOException {
|
||||
LongArray array = LongArrayFactory.mmapForWritingShared(Path.of("/tmp/test"), 1024);
|
||||
for (int i = 0; i < 1024; i++) {
|
||||
array.set(i, i);
|
||||
}
|
||||
array.close();
|
||||
|
||||
var ms = Arena.global().allocate(512, 8);
|
||||
|
||||
int fd = LinuxSystemCalls.openDirect(Path.of("/tmp/test"));
|
||||
int ret = LinuxSystemCalls.readAt(fd, ms, 512);
|
||||
System.out.println(ret);
|
||||
System.out.println(ms.byteSize());
|
||||
LinuxSystemCalls.closeFd(fd);
|
||||
|
||||
var array2 = LongArrayFactory.wrap(ms);
|
||||
for (int i = 0; i < array2.size(); i++) {
|
||||
System.out.println(i + ": " + array2.get(i));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
void testDirectFileReader() throws IOException {
|
||||
LongArray array = LongArrayFactory.mmapForWritingShared(Path.of("/tmp/test"), 1024);
|
||||
for (int i = 0; i < 1024; i++) {
|
||||
array.set(i, i);
|
||||
}
|
||||
array.close();
|
||||
|
||||
|
||||
try (var dfr = new DirectFileReader(Path.of("/tmp/test"))) {
|
||||
LongArray array2 = LongArrayFactory.onHeapConfined(64);
|
||||
dfr.readAligned(array2, 0);
|
||||
for (int i = 0; i < array2.size(); i++) {
|
||||
System.out.println(i + ": " + array2.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
var alignedBuffer = Arena.ofAuto().allocate(4096, 4096);
|
||||
try (var dfr = new DirectFileReader(Path.of("/tmp/test"))) {
|
||||
MemorySegment dest = Arena.ofAuto().allocate(504, 1);
|
||||
dfr.readUnaligned(dest, alignedBuffer, 8);
|
||||
|
||||
for (int i = 0; i < dest.byteSize(); i+=8) {
|
||||
System.out.println(i + ": " + dest.get(ValueLayout.JAVA_LONG, i));
|
||||
}
|
||||
|
||||
dfr.readUnaligned(dest, alignedBuffer, 4000);
|
||||
for (int i = 0; i < dest.byteSize(); i+=8) {
|
||||
System.out.println(i + ": " + dest.get(ValueLayout.JAVA_LONG, i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void testAioFileReader() throws IOException {
|
||||
LongArray array = LongArrayFactory.mmapForWritingShared(Path.of("/tmp/test"), 1024);
|
||||
for (int i = 0; i < 1024; i++) {
|
||||
array.set(i, i);
|
||||
}
|
||||
array.close();
|
||||
|
||||
try (var dfr = new UringFileReader(Path.of("/tmp/test"), false)) {
|
||||
MemorySegment buf1 = Arena.ofAuto().allocate(32, 8);
|
||||
MemorySegment buf2 = Arena.ofAuto().allocate(16, 8);
|
||||
|
||||
dfr.read(List.of(buf1, buf2), List.of(0L, 8L));
|
||||
|
||||
for (int i = 0; i < buf1.byteSize(); i+=8) {
|
||||
System.out.println(buf1.get(ValueLayout.JAVA_LONG, i));
|
||||
}
|
||||
|
||||
for (int i = 0; i < buf2.byteSize(); i+=8) {
|
||||
System.out.println(buf2.get(ValueLayout.JAVA_LONG, i));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
@@ -32,9 +32,21 @@ class LongArraySearchTest {
|
||||
@Test
|
||||
public void testEmptyRange() {
|
||||
assertTrue(segmentArray.binarySearchN(2, 0, 0, 0) <= 0);
|
||||
|
||||
assertTrue(segmentArray.binarySearch(0, 0, 0) <= 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBinarySearchNCase() {
|
||||
try (var array = LongArrayFactory.onHeapConfined(1024)) {
|
||||
for (int i = 0; i < 64; i++) {
|
||||
array.set(2*i, 3*i);
|
||||
array.set(2*i+1, i);
|
||||
System.out.println(i + ":" + array.get(i));
|
||||
}
|
||||
System.out.println(array.binarySearchN(2, 3, 0, 64));
|
||||
}
|
||||
}
|
||||
|
||||
void binarySearchTester(LongArray array) {
|
||||
for (int i = 0; i < array.size() * 3; i++) {
|
||||
|
@@ -0,0 +1,36 @@
|
||||
package nu.marginalia.array.pool;
|
||||
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.lang.foreign.Arena;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
|
||||
import static java.lang.foreign.ValueLayout.JAVA_LONG;
|
||||
|
||||
class UnsafeMemoryPageTest {
|
||||
|
||||
@Test
|
||||
void binarySearchLong() {
|
||||
MemorySegment ms = Arena.ofAuto().allocate(8 * 9);
|
||||
ms.setAtIndex(JAVA_LONG, 0, 2260);
|
||||
ms.setAtIndex(JAVA_LONG, 1, 2513);
|
||||
ms.setAtIndex(JAVA_LONG, 2, 3531);
|
||||
ms.setAtIndex(JAVA_LONG, 3, 4637);
|
||||
ms.setAtIndex(JAVA_LONG, 4, 4975);
|
||||
ms.setAtIndex(JAVA_LONG, 5, 6647);
|
||||
ms.setAtIndex(JAVA_LONG, 6, 7179);
|
||||
ms.setAtIndex(JAVA_LONG, 7, 7509);
|
||||
ms.setAtIndex(JAVA_LONG, 8, 8000);
|
||||
UnsafeMemoryPage page = new UnsafeMemoryPage(ms, 1);
|
||||
Assertions.assertEquals(0, page.binarySearchLong(2260, 0, 0, 9));
|
||||
Assertions.assertEquals(1, page.binarySearchLong(2513, 0, 0, 9));
|
||||
Assertions.assertEquals(2, page.binarySearchLong(3531, 0, 0, 9));
|
||||
Assertions.assertEquals(3, page.binarySearchLong(4637, 0, 0, 9));
|
||||
Assertions.assertEquals(4, page.binarySearchLong(4975, 0, 0, 9));
|
||||
Assertions.assertEquals(5, page.binarySearchLong(6647, 0, 0, 9));
|
||||
Assertions.assertEquals(6, page.binarySearchLong(7179, 0, 0, 9));
|
||||
Assertions.assertEquals(7, page.binarySearchLong(7509, 0, 0, 9));
|
||||
Assertions.assertEquals(8, page.binarySearchLong(8000, 0, 0, 9));
|
||||
}
|
||||
}
|
@@ -14,6 +14,8 @@ dependencies {
|
||||
implementation project(':code:libraries:array')
|
||||
|
||||
implementation libs.bundles.slf4j
|
||||
implementation libs.fastutil
|
||||
implementation libs.notnull
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
testImplementation libs.bundles.junit
|
||||
|
@@ -187,11 +187,8 @@ public class BTreeReader {
|
||||
|
||||
/** Move the pointer to the next layer in the direction of the provided key */
|
||||
public void walkTowardChild(long key) {
|
||||
|
||||
final long searchStart = layerOffsets[layer] + pointerOffset;
|
||||
|
||||
final long nextLayerOffset = index.binarySearch(key, searchStart, searchStart + ctx.pageSize()) - searchStart;
|
||||
|
||||
layer --;
|
||||
maxValueInBlock = index.get(searchStart + nextLayerOffset);
|
||||
pointerOffset = ctx.pageSize() * (pointerOffset + nextLayerOffset);
|
||||
@@ -250,21 +247,20 @@ public class BTreeReader {
|
||||
long dataIndex = findData(buffer.currentValue());
|
||||
if (dataIndex >= 0) {
|
||||
buffer.retainAndAdvance();
|
||||
|
||||
if (buffer.hasMore() && buffer.currentValue() <= maxValueInBlock) {
|
||||
long relOffsetInBlock = dataIndex - pointerOffset * ctx.entrySize;
|
||||
|
||||
long remainingTotal = dataBlockEnd - dataIndex;
|
||||
long remainingBlock = ctx.pageSize() - relOffsetInBlock; // >= 0
|
||||
|
||||
long searchEnd = dataIndex + min(remainingTotal, remainingBlock);
|
||||
|
||||
data.retainN(buffer, ctx.entrySize, maxValueInBlock, dataIndex, searchEnd);
|
||||
}
|
||||
}
|
||||
else {
|
||||
buffer.rejectAndAdvance();
|
||||
}
|
||||
if (buffer.hasMore() && buffer.currentValue() <= maxValueInBlock) {
|
||||
long relOffsetInBlock = dataIndex - pointerOffset * ctx.entrySize;
|
||||
|
||||
long remainingTotal = dataBlockEnd - dataIndex;
|
||||
long remainingBlock = ctx.pageSize() - relOffsetInBlock; // >= 0
|
||||
|
||||
long searchEnd = dataIndex + min(remainingTotal, remainingBlock);
|
||||
|
||||
data.retainN(buffer, ctx.entrySize, maxValueInBlock, dataIndex, searchEnd);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@@ -32,7 +32,16 @@ public class BTreeWriter {
|
||||
|
||||
// Calculate the data range
|
||||
final long startRange = header.dataOffsetLongs();
|
||||
final long endRange = startRange + (long) numEntries * ctx.entrySize;
|
||||
final long endRange;
|
||||
if (header.layers() == 0) {
|
||||
endRange = offset + ctx.pageSize();
|
||||
assert ctx.pageSize() - 3 >= numEntries * ctx.entrySize;
|
||||
}
|
||||
else {
|
||||
long dataSizeLongs = (long) numEntries * ctx.entrySize;
|
||||
long dataSizeBlockRounded = (long) ctx.pageSize() * ( dataSizeLongs / ctx.pageSize() + Long.signum(dataSizeLongs % ctx.pageSize()));
|
||||
endRange = startRange + dataSizeBlockRounded;
|
||||
}
|
||||
|
||||
// Prepare to write the data
|
||||
var slice = map.range(startRange, endRange);
|
||||
@@ -53,7 +62,9 @@ public class BTreeWriter {
|
||||
}
|
||||
|
||||
// Return the size of the written data
|
||||
return endRange - offset;
|
||||
long size = endRange - offset;
|
||||
assert (size % ctx.pageSize()) == 0 : "Size is not page size aligned, was " + size + ", page size = " + ctx.pageSize();
|
||||
return size;
|
||||
}
|
||||
|
||||
|
||||
|
@@ -5,7 +5,7 @@ public enum BTreeBlockSize {
|
||||
BS_32(2),
|
||||
BS_64(3),
|
||||
BS_128(4),
|
||||
BS_245(5),
|
||||
BS_256(5),
|
||||
BS_512(6),
|
||||
BS_1024(7),
|
||||
BS_2048(8),
|
||||
|
@@ -9,14 +9,6 @@ public class BTreeContext {
|
||||
private final int blockSizeBits;
|
||||
private final int pageSize;
|
||||
|
||||
// Below this number of data pages, a b-tree will not be constructed.
|
||||
//
|
||||
// 8 pages is the breaking point where using a B-tree is actually advantageous
|
||||
// over just binary searching in a sorted list. Above 8 pages, binary search will
|
||||
// worst-case four page faults. A b-tree will incur three page faults up until
|
||||
// ~100k-200k entries with typical configurations.
|
||||
private static final int MIN_PAGES_FOR_BTREE = 8;
|
||||
|
||||
/**
|
||||
* @param maxLayers The maximum number of index layers
|
||||
* @param entrySize The entry size, for size 1 the key is the data. For sizes larger than 1,
|
||||
@@ -38,11 +30,19 @@ public class BTreeContext {
|
||||
public long calculateSize(int numEntries) {
|
||||
var header = BTreeWriter.makeHeader(this, 0, numEntries);
|
||||
|
||||
return header.dataOffsetLongs() + (long) numEntries * entrySize + 4;
|
||||
long size;
|
||||
if (header.layers() == 0)
|
||||
size = pageSize;
|
||||
else {
|
||||
long dataSizeLongs = (long) numEntries * entrySize;
|
||||
long dataSizeBlockRounded = pageSize * (dataSizeLongs / pageSize + Long.signum(dataSizeLongs % pageSize));
|
||||
size = header.dataOffsetLongs() + dataSizeBlockRounded;
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
public int numIndexLayers(int numEntries) {
|
||||
if (entrySize * numEntries <= pageSize * MIN_PAGES_FOR_BTREE) {
|
||||
if (entrySize * numEntries <= pageSize - 3) {
|
||||
return 0;
|
||||
}
|
||||
for (int i = 1; i < maxLayers; i++) {
|
||||
|
@@ -0,0 +1,52 @@
|
||||
package nu.marginalia.skiplist;
|
||||
|
||||
public class SkipListConstants {
|
||||
public static final int BLOCK_SIZE = Integer.getInteger("index.documentsSkipListBlockSize", 65536);
|
||||
static final int MIN_TRUNCATED_BLOCK_SIZE = Math.min(512, BLOCK_SIZE / 4);
|
||||
|
||||
static final int HEADER_SIZE = 8;
|
||||
static final int SEGREGATED_HEADER_SIZE = 16;
|
||||
static final int RECORD_SIZE = 2;
|
||||
static final int MAX_RECORDS_PER_BLOCK = (BLOCK_SIZE/8 - 2)/RECORD_SIZE;
|
||||
|
||||
static final byte FLAG_END_BLOCK = 1<<0;
|
||||
|
||||
|
||||
static int skipOffsetForPointer(int pointerIdx) {
|
||||
final int linearPart = 16;
|
||||
if (pointerIdx <= linearPart) {
|
||||
return pointerIdx + 1;
|
||||
}
|
||||
return linearPart + ((pointerIdx - linearPart - 1) * (pointerIdx - linearPart - 1));
|
||||
}
|
||||
|
||||
static int numPointersForBlock(int blockIdx) {
|
||||
return 64;
|
||||
}
|
||||
|
||||
static int numPointersForRootBlock(int n) {
|
||||
int numBlocks = estimateNumBlocks(n);
|
||||
for (int fp = 0;;fp++) {
|
||||
if (skipOffsetForPointer(fp) >= numBlocks) {
|
||||
return fp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int rootBlockCapacity(int rootBlockSize, int n) {
|
||||
return Math.min(n, (rootBlockSize - SEGREGATED_HEADER_SIZE - 8 * numPointersForRootBlock(n)) / (8*RECORD_SIZE));
|
||||
}
|
||||
|
||||
static int nonRootBlockCapacity(int blockIdx) {
|
||||
assert blockIdx >= 1;
|
||||
return (BLOCK_SIZE - SEGREGATED_HEADER_SIZE - 8 * numPointersForBlock(blockIdx)) / (8*RECORD_SIZE);
|
||||
}
|
||||
|
||||
static int estimateNumBlocks(int n) {
|
||||
return n / MAX_RECORDS_PER_BLOCK + Integer.signum(n % MAX_RECORDS_PER_BLOCK);
|
||||
}
|
||||
|
||||
public static int pageDataOffset(int baseBlockOffset, int fc) {
|
||||
return baseBlockOffset + 8 * (1 + fc);
|
||||
}
|
||||
}
|
@@ -0,0 +1,545 @@
|
||||
package nu.marginalia.skiplist;
|
||||
|
||||
import it.unimi.dsi.fastutil.longs.LongArrayList;
|
||||
import it.unimi.dsi.fastutil.longs.LongList;
|
||||
import nu.marginalia.array.page.LongQueryBuffer;
|
||||
import nu.marginalia.array.pool.BufferPool;
|
||||
import nu.marginalia.array.pool.MemoryPage;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.lang.foreign.ValueLayout;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class SkipListReader {
|
||||
|
||||
private final BufferPool pool;
|
||||
private final long blockStart;
|
||||
|
||||
private long currentBlock;
|
||||
private int currentBlockOffset;
|
||||
private int currentBlockIdx;
|
||||
|
||||
private boolean atEnd;
|
||||
|
||||
public SkipListReader(BufferPool pool, long blockStart) {
|
||||
this.pool = pool;
|
||||
this.blockStart = blockStart;
|
||||
|
||||
currentBlock = blockStart & -SkipListConstants.BLOCK_SIZE;
|
||||
currentBlockOffset = (int) (blockStart & (SkipListConstants.BLOCK_SIZE - 1));
|
||||
atEnd = false;
|
||||
|
||||
currentBlockIdx = 0;
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
currentBlock = blockStart & -SkipListConstants.BLOCK_SIZE;
|
||||
currentBlockOffset = (int) (blockStart & (SkipListConstants.BLOCK_SIZE - 1));
|
||||
currentBlockIdx = 0;
|
||||
|
||||
atEnd = false;
|
||||
}
|
||||
|
||||
public boolean atEnd() {
|
||||
return atEnd;
|
||||
}
|
||||
|
||||
public int estimateSize() {
|
||||
try (var page = pool.get(currentBlock)) {
|
||||
int fc = headerForwardCount(page, currentBlockOffset);
|
||||
if (fc > 0) {
|
||||
return SkipListConstants.MAX_RECORDS_PER_BLOCK * SkipListConstants.skipOffsetForPointer(fc);
|
||||
}
|
||||
else {
|
||||
return headerNumRecords(page, currentBlockOffset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
boolean retainInPage(MemoryPage page, int dataOffset, int n, LongQueryBuffer data) {
|
||||
|
||||
int matches = 0;
|
||||
|
||||
while (data.hasMore()
|
||||
&& n > (currentBlockIdx = page.binarySearchLong(data.currentValue(), dataOffset, currentBlockIdx, n)))
|
||||
{
|
||||
if (data.currentValue() != page.getLong( dataOffset + currentBlockIdx * 8)) {
|
||||
data.rejectAndAdvance();
|
||||
}
|
||||
else {
|
||||
data.retainAndAdvance();
|
||||
matches++;
|
||||
|
||||
if (++matches > 5) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
outer:
|
||||
while (data.hasMore()) {
|
||||
long bv = data.currentValue();
|
||||
|
||||
for (; currentBlockIdx < n; currentBlockIdx++) {
|
||||
long pv = page.getLong( dataOffset + currentBlockIdx * 8);
|
||||
if (bv < pv) {
|
||||
data.rejectAndAdvance();
|
||||
continue outer;
|
||||
}
|
||||
else if (bv == pv) {
|
||||
data.retainAndAdvance();
|
||||
continue outer;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return currentBlockIdx >= n;
|
||||
}
|
||||
|
||||
public boolean tryRetainData(@NotNull LongQueryBuffer data) {
|
||||
try (var page = pool.get(currentBlock)) {
|
||||
|
||||
int n = headerNumRecords(page, currentBlockOffset);
|
||||
int fc = headerForwardCount(page, currentBlockOffset);
|
||||
int flags = headerFlags(page, currentBlockOffset);
|
||||
|
||||
int dataOffset = SkipListConstants.pageDataOffset(currentBlockOffset, fc);
|
||||
if (retainInPage(page, dataOffset, n, data)) {
|
||||
atEnd = (flags & SkipListConstants.FLAG_END_BLOCK) != 0;
|
||||
if (atEnd) {
|
||||
while (data.hasMore())
|
||||
data.rejectAndAdvance();
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!data.hasMore()) {
|
||||
currentBlock += SkipListConstants.BLOCK_SIZE;
|
||||
}
|
||||
else {
|
||||
long nextBlock = currentBlock + (long) SkipListConstants.BLOCK_SIZE;
|
||||
long currentValue = data.currentValue();
|
||||
for (int i = 0; i < fc; i++) {
|
||||
long blockMaxValue = page.getLong(currentBlockOffset + SkipListConstants.HEADER_SIZE + 8 * i);
|
||||
nextBlock = currentBlock + (long) SkipListConstants.BLOCK_SIZE * SkipListConstants.skipOffsetForPointer(Math.max(0, i-1));
|
||||
if (blockMaxValue >= currentValue) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
currentBlockOffset = 0;
|
||||
currentBlockIdx = 0;
|
||||
currentBlock = nextBlock;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return data.hasMore();
|
||||
}
|
||||
|
||||
|
||||
public void retainData(@NotNull LongQueryBuffer data) {
|
||||
while (data.hasMore()) {
|
||||
try (var page = pool.get(currentBlock)) {
|
||||
|
||||
int n = headerNumRecords(page, currentBlockOffset);
|
||||
int fc = headerForwardCount(page, currentBlockOffset);
|
||||
int flags = headerFlags(page, currentBlockOffset);
|
||||
|
||||
int dataOffset = SkipListConstants.pageDataOffset(currentBlockOffset, fc);
|
||||
if (retainInPage(page, dataOffset, n, data)) {
|
||||
atEnd = (flags & SkipListConstants.FLAG_END_BLOCK) != 0;
|
||||
if (atEnd) {
|
||||
while (data.hasMore())
|
||||
data.rejectAndAdvance();
|
||||
return;
|
||||
}
|
||||
|
||||
if (!data.hasMore()) {
|
||||
currentBlock += SkipListConstants.BLOCK_SIZE;
|
||||
}
|
||||
else {
|
||||
long nextBlock = currentBlock + (long) SkipListConstants.BLOCK_SIZE;
|
||||
long currentValue = data.currentValue();
|
||||
for (int i = 0; i < fc; i++) {
|
||||
long blockMaxValue = page.getLong(currentBlockOffset + SkipListConstants.HEADER_SIZE + 8 * i);
|
||||
nextBlock = currentBlock + (long) SkipListConstants.BLOCK_SIZE * SkipListConstants.skipOffsetForPointer(Math.max(0, i-1));
|
||||
if (blockMaxValue >= currentValue) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
currentBlockOffset = 0;
|
||||
currentBlockIdx = 0;
|
||||
currentBlock = nextBlock;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public long[] getValueOffsets(long[] keys) {
|
||||
int pos = 0;
|
||||
long[] vals = new long[keys.length];
|
||||
|
||||
while (pos < keys.length) {
|
||||
try (var page = pool.get(currentBlock)) {
|
||||
MemorySegment ms = page.getMemorySegment();
|
||||
assert ms.get(ValueLayout.JAVA_INT, currentBlockOffset) != 0 : "Likely reading zero space @ " + currentBlockOffset + " starting at " + blockStart + " -- " + parseBlock(ms, currentBlockOffset);
|
||||
int n = headerNumRecords(page, currentBlockOffset);
|
||||
int fc = headerForwardCount(page, currentBlockOffset);
|
||||
byte flags = (byte) headerFlags(page, currentBlockOffset);
|
||||
|
||||
if (n == 0) {
|
||||
throw new IllegalStateException("Reading null memory!");
|
||||
}
|
||||
|
||||
int dataOffset = SkipListConstants.pageDataOffset(currentBlockOffset, fc);
|
||||
int valuesOffset = dataOffset + 8 * n;
|
||||
if ((valuesOffset & 7) != 0) {
|
||||
throw new IllegalStateException(parseBlock(ms, currentBlockOffset).toString());
|
||||
}
|
||||
|
||||
int matches = 0;
|
||||
|
||||
while (pos < keys.length
|
||||
&& n > (currentBlockIdx = page.binarySearchLong(keys[pos], dataOffset, currentBlockIdx, n)))
|
||||
{
|
||||
if (keys[pos] != page.getLong( dataOffset + currentBlockIdx * 8)) {
|
||||
pos++;
|
||||
}
|
||||
else {
|
||||
vals[pos++] = page.getLong(valuesOffset + currentBlockIdx * 8);
|
||||
matches++;
|
||||
|
||||
if (++matches > 5) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
outer:
|
||||
while (pos < keys.length) {
|
||||
long kv = keys[pos];
|
||||
|
||||
for (; currentBlockIdx < n; currentBlockIdx++) {
|
||||
long pv = page.getLong( dataOffset + currentBlockIdx * 8);
|
||||
if (kv < pv) {
|
||||
pos++;
|
||||
continue outer;
|
||||
}
|
||||
else if (kv == pv) {
|
||||
vals[pos++] = page.getLong(valuesOffset + currentBlockIdx * 8);
|
||||
continue outer;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (currentBlockIdx >= n) {
|
||||
atEnd = (flags & SkipListConstants.FLAG_END_BLOCK) != 0;
|
||||
if (atEnd) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (pos >= keys.length) {
|
||||
currentBlock += SkipListConstants.BLOCK_SIZE;
|
||||
}
|
||||
else {
|
||||
long nextBlock = currentBlock + (long) SkipListConstants.BLOCK_SIZE;
|
||||
long currentValue = keys[pos];
|
||||
for (int i = 0; i < fc; i++) {
|
||||
long blockMaxValue = page.getLong(currentBlockOffset + SkipListConstants.HEADER_SIZE + 8 * i);
|
||||
nextBlock = currentBlock + (long) SkipListConstants.BLOCK_SIZE * SkipListConstants.skipOffsetForPointer(Math.max(0, i-1));
|
||||
if (blockMaxValue >= currentValue) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
currentBlockOffset = 0;
|
||||
currentBlockIdx = 0;
|
||||
currentBlock = nextBlock;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return vals;
|
||||
}
|
||||
|
||||
boolean rejectInPage(MemoryPage page, int dataOffset, int n, LongQueryBuffer data) {
|
||||
|
||||
int matches = 0;
|
||||
|
||||
while (data.hasMore()
|
||||
&& n > (currentBlockIdx = page.binarySearchLong(data.currentValue(), dataOffset, currentBlockIdx, n)))
|
||||
{
|
||||
if (data.currentValue() != page.getLong( dataOffset + currentBlockIdx * 8)) {
|
||||
data.retainAndAdvance();
|
||||
}
|
||||
else {
|
||||
data.rejectAndAdvance();
|
||||
matches++;
|
||||
|
||||
if (++matches > 5) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
outer:
|
||||
while (data.hasMore()) {
|
||||
long bv = data.currentValue();
|
||||
|
||||
for (; currentBlockIdx < n; currentBlockIdx++) {
|
||||
long pv = page.getLong( dataOffset + currentBlockIdx * 8);
|
||||
if (bv < pv) {
|
||||
data.retainAndAdvance();
|
||||
continue outer;
|
||||
}
|
||||
else if (bv == pv) {
|
||||
data.rejectAndAdvance();
|
||||
continue outer;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return currentBlockIdx >= n;
|
||||
}
|
||||
|
||||
public boolean tryRejectData(@NotNull LongQueryBuffer data) {
|
||||
try (var page = pool.get(currentBlock)) {
|
||||
|
||||
int n = headerNumRecords(page, currentBlockOffset);
|
||||
int fc = headerForwardCount(page, currentBlockOffset);
|
||||
int flags = headerFlags(page, currentBlockOffset);
|
||||
|
||||
int dataOffset = SkipListConstants.pageDataOffset(currentBlockOffset, fc);
|
||||
if (rejectInPage(page, dataOffset, n, data)) {
|
||||
atEnd = (flags & SkipListConstants.FLAG_END_BLOCK) != 0;
|
||||
if (atEnd) {
|
||||
while (data.hasMore())
|
||||
data.retainAndAdvance();
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!data.hasMore()) {
|
||||
currentBlock += SkipListConstants.BLOCK_SIZE;
|
||||
}
|
||||
else {
|
||||
long nextBlock = currentBlock + (long) SkipListConstants.BLOCK_SIZE;
|
||||
long currentValue = data.currentValue();
|
||||
for (int i = 0; i < fc; i++) {
|
||||
long blockMaxValue = page.getLong(currentBlockOffset + SkipListConstants.HEADER_SIZE + 8 * i);
|
||||
nextBlock = currentBlock + (long) SkipListConstants.BLOCK_SIZE * SkipListConstants.skipOffsetForPointer(Math.max(0, i-1));
|
||||
if (blockMaxValue >= currentValue) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
currentBlockOffset = 0;
|
||||
currentBlockIdx = 0;
|
||||
currentBlock = nextBlock;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return data.hasMore();
|
||||
}
|
||||
|
||||
public void rejectData(@NotNull LongQueryBuffer data) {
|
||||
while (data.hasMore()) {
|
||||
try (var page = pool.get(currentBlock)) {
|
||||
MemorySegment ms = page.getMemorySegment();
|
||||
|
||||
int n = headerNumRecords(page, currentBlockOffset);
|
||||
int fc = headerForwardCount(page, currentBlockOffset);
|
||||
byte flags = (byte) headerFlags(page, currentBlockOffset);
|
||||
|
||||
int dataOffset = SkipListConstants.pageDataOffset(currentBlockOffset, fc);
|
||||
|
||||
if (rejectInPage(page, dataOffset, n, data)) {
|
||||
atEnd = (flags & SkipListConstants.FLAG_END_BLOCK) != 0;
|
||||
if (atEnd) {
|
||||
while (data.hasMore())
|
||||
data.retainAndAdvance();
|
||||
break;
|
||||
}
|
||||
if (!data.hasMore()) {
|
||||
currentBlock += SkipListConstants.BLOCK_SIZE;
|
||||
}
|
||||
else {
|
||||
long nextBlock = currentBlock + (long) SkipListConstants.BLOCK_SIZE;
|
||||
long currentValue = data.currentValue();
|
||||
for (int i = 0; i < fc; i++) {
|
||||
long blockMaxValue = page.getLong(currentBlockOffset + SkipListConstants.HEADER_SIZE + 8 * i);
|
||||
nextBlock = currentBlock + (long) SkipListConstants.BLOCK_SIZE * SkipListConstants.skipOffsetForPointer(Math.max(0, i-1));
|
||||
if (blockMaxValue >= currentValue) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
currentBlockOffset = 0;
|
||||
currentBlockIdx = 0;
|
||||
currentBlock = nextBlock;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
public int getData(@NotNull LongQueryBuffer dest)
|
||||
{
|
||||
if (atEnd) return 0;
|
||||
|
||||
int totalCopied = 0;
|
||||
while (dest.fitsMore() && !atEnd) {
|
||||
try (var page = pool.get(currentBlock)) {
|
||||
MemorySegment ms = page.getMemorySegment();
|
||||
|
||||
assert ms.get(ValueLayout.JAVA_INT, currentBlockOffset) != 0 : "Likely reading zero space";
|
||||
int n = headerNumRecords(page, currentBlockOffset);
|
||||
int fc = headerForwardCount(page, currentBlockOffset);
|
||||
|
||||
if (n == 0) {
|
||||
throw new IllegalStateException("Reading null memory!");
|
||||
}
|
||||
|
||||
assert fc >= 0;
|
||||
byte flags = (byte) headerFlags(page, currentBlockOffset);
|
||||
|
||||
int dataOffset = SkipListConstants.pageDataOffset(currentBlockOffset, fc);
|
||||
|
||||
int nCopied = dest.addData(ms, dataOffset, n - currentBlockIdx);
|
||||
currentBlockIdx += nCopied;
|
||||
|
||||
if (currentBlockIdx >= n) {
|
||||
atEnd = (flags & SkipListConstants.FLAG_END_BLOCK) != 0;
|
||||
if (!atEnd) {
|
||||
currentBlock += SkipListConstants.BLOCK_SIZE;
|
||||
currentBlockOffset = 0;
|
||||
currentBlockIdx = 0;
|
||||
}
|
||||
}
|
||||
|
||||
totalCopied += nCopied;
|
||||
}
|
||||
}
|
||||
|
||||
return totalCopied;
|
||||
}
|
||||
|
||||
|
||||
public record RecordView(int n,
|
||||
int fc,
|
||||
int flags,
|
||||
LongList fowardPointers,
|
||||
LongList docIds)
|
||||
{
|
||||
public long highestDocId() {
|
||||
return docIds.getLast();
|
||||
}
|
||||
}
|
||||
|
||||
public static RecordView parseBlock(MemorySegment seg, int offset) {
|
||||
int n = headerNumRecords(seg, offset);
|
||||
int fc = headerForwardCount(seg, offset);
|
||||
int flags = headerFlags(seg, offset);
|
||||
|
||||
assert n <= SkipListConstants.MAX_RECORDS_PER_BLOCK : "Invalid header, n = " + n;
|
||||
|
||||
offset += SkipListConstants.HEADER_SIZE;
|
||||
|
||||
LongList forwardPointers = new LongArrayList();
|
||||
for (int i = 0; i < fc; i++) {
|
||||
forwardPointers.add(seg.get(ValueLayout.JAVA_LONG, offset + 8L*i));
|
||||
}
|
||||
offset += 8*fc;
|
||||
|
||||
LongList docIds = new LongArrayList();
|
||||
|
||||
long currentBlock = offset & -SkipListConstants.BLOCK_SIZE;
|
||||
long lastDataBlock = (offset + 8L * (n-1)) & - SkipListConstants.BLOCK_SIZE;
|
||||
|
||||
if (currentBlock != lastDataBlock) {
|
||||
throw new IllegalStateException("Last data block is not the same as the current data block (n=" + n +", flags=" + flags + ")" + " for block offset " + (offset & (SkipListConstants.BLOCK_SIZE - 1)));
|
||||
}
|
||||
|
||||
for (int i = 0; i < n; i++) {
|
||||
docIds.add(seg.get(ValueLayout.JAVA_LONG, offset + 8L * i));
|
||||
}
|
||||
|
||||
for (int i = 1; i < docIds.size(); i++) {
|
||||
if (docIds.getLong(i-1) >= docIds.getLong(i)) {
|
||||
throw new IllegalStateException("docIds are not increasing" + new RecordView(n, fc, flags, forwardPointers, docIds));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return new RecordView(n, fc, flags, forwardPointers, docIds);
|
||||
}
|
||||
|
||||
public static List<RecordView> parseBlocks(MemorySegment seg, int offset) {
|
||||
List<RecordView> ret = new ArrayList<>();
|
||||
RecordView block;
|
||||
do {
|
||||
block = parseBlock(seg, offset);
|
||||
ret.add(block);
|
||||
offset = (offset + SkipListConstants.BLOCK_SIZE) & -SkipListConstants.BLOCK_SIZE;
|
||||
} while (0 == (block.flags & SkipListConstants.FLAG_END_BLOCK));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
public static List<RecordView> parseBlocks(BufferPool pool, long offset) {
|
||||
List<RecordView> ret = new ArrayList<>();
|
||||
RecordView block;
|
||||
do {
|
||||
try (var page = pool.get(offset & -SkipListConstants.BLOCK_SIZE)) {
|
||||
block = parseBlock(page.getMemorySegment(), (int) (offset & (SkipListConstants.BLOCK_SIZE - 1)));
|
||||
ret.add(block);
|
||||
offset = (offset + SkipListConstants.BLOCK_SIZE) & -SkipListConstants.BLOCK_SIZE;
|
||||
}
|
||||
|
||||
} while (0 == (block.flags & SkipListConstants.FLAG_END_BLOCK));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
public static int headerNumRecords(MemoryPage buffer, int offset) {
|
||||
return buffer.getInt(offset);
|
||||
}
|
||||
|
||||
public static int headerNumRecords(MemorySegment block, int offset) {
|
||||
return block.get(ValueLayout.JAVA_INT, offset);
|
||||
}
|
||||
|
||||
public static int headerForwardCount(MemoryPage buffer, int offset) {
|
||||
return buffer.getByte(offset + 4);
|
||||
}
|
||||
|
||||
public static int headerForwardCount(MemorySegment block, int offset) {
|
||||
return block.get(ValueLayout.JAVA_BYTE, offset + 4);
|
||||
}
|
||||
|
||||
private long headerValuesBaseOffset(MemoryPage buffer, int blockOffset) {
|
||||
return buffer.getLong(blockOffset + 8 * (1+headerForwardCount(buffer, blockOffset)));
|
||||
}
|
||||
|
||||
public static int headerFlags(MemoryPage buffer, int offset) {
|
||||
return buffer.getByte(offset + 5);
|
||||
}
|
||||
|
||||
public static int headerFlags(MemorySegment block, int offset) {
|
||||
return block.get(ValueLayout.JAVA_BYTE, offset + 5);
|
||||
}
|
||||
|
||||
public static int docIdsOffset(MemorySegment block, int offset) {
|
||||
return offset + SkipListConstants.HEADER_SIZE + 8 * headerForwardCount(block, offset);
|
||||
}
|
||||
|
||||
public static int valuesOffset(MemorySegment block, int offset) {
|
||||
return offset + SkipListConstants.HEADER_SIZE + 8 * (headerForwardCount(block, offset) + headerNumRecords(block, offset));
|
||||
}
|
||||
|
||||
}
|
@@ -0,0 +1,279 @@
|
||||
package nu.marginalia.skiplist;
|
||||
|
||||
import it.unimi.dsi.fastutil.longs.LongArrayList;
|
||||
import nu.marginalia.array.LongArray;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.foreign.ValueLayout;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
|
||||
import static nu.marginalia.skiplist.SkipListConstants.*;
|
||||
|
||||
public class SkipListWriter implements AutoCloseable {
|
||||
private final FileChannel documentsChannel;
|
||||
|
||||
|
||||
private final ByteBuffer docsBuffer = ByteBuffer.allocateDirect(BLOCK_SIZE).order(ByteOrder.nativeOrder());
|
||||
private final LongArrayList maxValuesList = new LongArrayList();
|
||||
|
||||
public SkipListWriter(Path documentsFileName) throws IOException {
|
||||
this.documentsChannel = (FileChannel) Files.newByteChannel(documentsFileName, StandardOpenOption.WRITE, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
int blockRemaining = (int) (BLOCK_SIZE - (documentsChannel.position() & (BLOCK_SIZE - 1)));
|
||||
docsBuffer.position(0);
|
||||
docsBuffer.limit(blockRemaining);
|
||||
while (docsBuffer.hasRemaining()) {
|
||||
documentsChannel.write(docsBuffer);
|
||||
}
|
||||
|
||||
documentsChannel.force(false);
|
||||
if ((documentsChannel.position() & (BLOCK_SIZE-1)) != 0) {
|
||||
throw new IllegalStateException("Wrote a documents file that was not aligned with block size " + BLOCK_SIZE);
|
||||
}
|
||||
documentsChannel.close();
|
||||
}
|
||||
|
||||
public long documentsPosition() throws IOException {
|
||||
return documentsChannel.position();
|
||||
}
|
||||
|
||||
|
||||
public void padDocuments(int nBytes) throws IOException {
|
||||
ByteBuffer buffer = ByteBuffer.allocateDirect(nBytes);
|
||||
buffer.order(ByteOrder.nativeOrder());
|
||||
while (buffer.hasRemaining()) {
|
||||
buffer.put((byte) 0);
|
||||
}
|
||||
buffer.flip();
|
||||
while (buffer.hasRemaining()) {
|
||||
documentsChannel.write(buffer);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void writeCompactBlockHeader(ByteBuffer buffer, int nItems, byte fc, byte flags) {
|
||||
assert nItems >= 0;
|
||||
assert nItems <= MAX_RECORDS_PER_BLOCK;
|
||||
assert fc >= 0;
|
||||
|
||||
buffer.putInt(nItems);
|
||||
buffer.put(fc); // number of records
|
||||
buffer.put(flags); // forward count = 0
|
||||
buffer.putShort((short) 0);
|
||||
|
||||
assert (buffer.position() % 8) == 0;
|
||||
}
|
||||
|
||||
public long writeList(LongArray input, long offset, int n) throws IOException {
|
||||
long startPos = documentsChannel.position();
|
||||
assert (startPos % 8) == 0 : "Not long aligned?!" + startPos;
|
||||
assert input.isSortedN(2, offset, offset + 2L*n) : "Not sorted @ " + input.hashCode();
|
||||
maxValuesList.clear();
|
||||
|
||||
int blockRemaining = (int) (BLOCK_SIZE - (startPos % BLOCK_SIZE));
|
||||
|
||||
if (blockRemaining >= (HEADER_SIZE + RECORD_SIZE * n * ValueLayout.JAVA_LONG.byteSize())) {
|
||||
/** THE ENTIRE DATA FITS IN THE CURRENT BLOCK */
|
||||
|
||||
docsBuffer.clear();
|
||||
|
||||
writeCompactBlockHeader(docsBuffer, n, (byte) 0, FLAG_END_BLOCK);
|
||||
|
||||
// Write the keys
|
||||
for (int i = 0; i < n; i++) {
|
||||
docsBuffer.putLong(input.get(offset + 2L * i));
|
||||
}
|
||||
|
||||
// Write the values
|
||||
for (int i = 0; i < n; i++) {
|
||||
docsBuffer.putLong(input.get(offset + 2L * i + 1));
|
||||
}
|
||||
|
||||
docsBuffer.flip();
|
||||
while (docsBuffer.hasRemaining()) {
|
||||
documentsChannel.write(docsBuffer);
|
||||
}
|
||||
|
||||
return startPos;
|
||||
}
|
||||
|
||||
if (blockRemaining < SkipListConstants.MIN_TRUNCATED_BLOCK_SIZE) {
|
||||
|
||||
/** REMAINING BLOCK TOO SMALL TO RECLAIM - INSERT PADDING */
|
||||
docsBuffer.clear();
|
||||
for (int i = 0; i < blockRemaining; i++) {
|
||||
docsBuffer.put((byte) 0);
|
||||
}
|
||||
docsBuffer.flip();
|
||||
while (docsBuffer.hasRemaining()) {
|
||||
startPos += documentsChannel.write(docsBuffer);
|
||||
}
|
||||
blockRemaining = BLOCK_SIZE;
|
||||
}
|
||||
|
||||
int writtenRecords = 0;
|
||||
int numBlocks = calculateActualNumBlocks(blockRemaining, n);
|
||||
|
||||
{
|
||||
int rootBlockCapacity = rootBlockCapacity(blockRemaining, n);
|
||||
int rootBlockPointerCount = numPointersForRootBlock(n);
|
||||
|
||||
/** WRITE THE ROOT BLOCK **/
|
||||
|
||||
docsBuffer.clear();
|
||||
byte flags = 0;
|
||||
if (numBlocks == 1) {
|
||||
flags = FLAG_END_BLOCK;
|
||||
}
|
||||
|
||||
writeCompactBlockHeader(docsBuffer, rootBlockCapacity, (byte) rootBlockPointerCount, flags);
|
||||
|
||||
findBlockHighestValues(input, maxValuesList,
|
||||
offset + (long) RECORD_SIZE * rootBlockCapacity,
|
||||
numBlocks,
|
||||
n - rootBlockCapacity);
|
||||
|
||||
// Write skip pointers
|
||||
for (int pi = 0; pi < rootBlockPointerCount; pi++) {
|
||||
int skipBlocks = skipOffsetForPointer(pi);
|
||||
|
||||
assert skipBlocks < 1 + numBlocks; // should be ~ 1/2 numBlocks at most for the root block
|
||||
|
||||
docsBuffer.putLong(maxValuesList.getLong(skipBlocks));
|
||||
}
|
||||
|
||||
// Write the keys
|
||||
for (int i = 0; i < rootBlockCapacity; i++) {
|
||||
docsBuffer.putLong(input.get(offset + 2L * i));
|
||||
}
|
||||
|
||||
// Write the values
|
||||
for (int i = 0; i < rootBlockCapacity; i++) {
|
||||
docsBuffer.putLong(input.get(offset + 2L * i + 1));
|
||||
}
|
||||
|
||||
// Move offset to next block's data
|
||||
offset += 2L * rootBlockCapacity;
|
||||
writtenRecords += rootBlockCapacity;
|
||||
|
||||
// Align block with page size
|
||||
if (numBlocks > 1) {
|
||||
while (docsBuffer.position() < blockRemaining) {
|
||||
docsBuffer.putLong(0L);
|
||||
}
|
||||
}
|
||||
|
||||
docsBuffer.flip();
|
||||
while (docsBuffer.hasRemaining()) {
|
||||
documentsChannel.write(docsBuffer);
|
||||
}
|
||||
}
|
||||
|
||||
/** WRITE REMAINING BLOCKS **/
|
||||
|
||||
for (int blockIdx = 1; blockIdx < numBlocks; blockIdx++) {
|
||||
int nRemaining = n - writtenRecords;
|
||||
int blockCapacity = nonRootBlockCapacity(blockIdx);
|
||||
|
||||
int maxPointers = numPointersForBlock(blockIdx);
|
||||
int forwardPointers;
|
||||
for (forwardPointers = 0; forwardPointers < maxPointers; forwardPointers++) {
|
||||
if (blockIdx + skipOffsetForPointer(forwardPointers) + 1 >= maxValuesList.size())
|
||||
break;
|
||||
}
|
||||
|
||||
boolean isLastBlock = blockIdx == (numBlocks - 1);
|
||||
int blockSize = Math.min(nRemaining, blockCapacity);
|
||||
docsBuffer.clear();
|
||||
|
||||
byte flags = 0;
|
||||
if (isLastBlock) {
|
||||
flags = FLAG_END_BLOCK;
|
||||
}
|
||||
writeCompactBlockHeader(docsBuffer, blockSize, (byte) forwardPointers, flags);
|
||||
|
||||
for (int pi = 0; pi < forwardPointers; pi++) {
|
||||
docsBuffer.putLong(maxValuesList.getLong(blockIdx + skipOffsetForPointer(pi)));
|
||||
}
|
||||
|
||||
// Write the keys
|
||||
for (int i = 0; i < blockSize; i++) {
|
||||
long docId = input.get(offset + 2L * i);
|
||||
docsBuffer.putLong(docId);
|
||||
}
|
||||
|
||||
// Write the values
|
||||
for (int i = 0; i < blockSize; i++) {
|
||||
long val = input.get(offset + 2L * i + 1);
|
||||
docsBuffer.putLong(val);
|
||||
}
|
||||
|
||||
// Move offset to next block's data
|
||||
offset += 2L * Math.min(nRemaining, blockCapacity);
|
||||
writtenRecords += Math.min(nRemaining, blockCapacity);
|
||||
|
||||
// Align block with page size everywhere but the last
|
||||
if (!isLastBlock) {
|
||||
while (docsBuffer.position() < docsBuffer.capacity()) {
|
||||
docsBuffer.putLong(0L);
|
||||
}
|
||||
}
|
||||
|
||||
docsBuffer.flip();
|
||||
while (docsBuffer.hasRemaining()) {
|
||||
documentsChannel.write(docsBuffer);
|
||||
}
|
||||
}
|
||||
|
||||
return startPos;
|
||||
}
|
||||
|
||||
|
||||
private void findBlockHighestValues(LongArray input,
|
||||
LongArrayList output,
|
||||
long offsetStart,
|
||||
int numBlocks,
|
||||
int n)
|
||||
{
|
||||
output.clear();
|
||||
|
||||
output.add(-1); // Add a dummy value for the root block
|
||||
|
||||
for (int i = 1; i < numBlocks; i++) {
|
||||
assert n >= 0;
|
||||
|
||||
int blockCapacity = nonRootBlockCapacity(i);
|
||||
long offsetEnd = offsetStart + 2L*Math.min(n, blockCapacity) - 2L;
|
||||
offsetStart += 2L*Math.min(n, blockCapacity);
|
||||
|
||||
n -= blockCapacity;
|
||||
output.add(input.get(offsetEnd));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static int calculateActualNumBlocks(int rootBlockSize, int n) {
|
||||
assert n >= 1;
|
||||
|
||||
int blocks = 1; // We always generate a root block
|
||||
n-=rootBlockCapacity(rootBlockSize, n);
|
||||
|
||||
for (int i = 1; n > 0; i++) {
|
||||
n-= nonRootBlockCapacity(i);
|
||||
blocks++;
|
||||
}
|
||||
|
||||
return blocks;
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
@@ -1,40 +0,0 @@
|
||||
package nu.marginalia.btree;
|
||||
|
||||
import nu.marginalia.array.LongArray;
|
||||
import nu.marginalia.btree.model.BTreeBlockSize;
|
||||
import nu.marginalia.btree.model.BTreeContext;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
|
||||
|
||||
public class BTreeReaderQueryDataWithIndexTest {
|
||||
BTreeContext ctx = new BTreeContext(5, 2, BTreeBlockSize.BS_64);
|
||||
LongArray array;
|
||||
|
||||
@BeforeEach
|
||||
public void setUp() throws IOException {
|
||||
array = LongArray.allocate(65536);
|
||||
new BTreeWriter(array, ctx).write(0, 1000, slice -> {
|
||||
for (int idx = 0; idx < 1000; idx++) {
|
||||
slice.set(idx * 2, 2 * idx);
|
||||
slice.set(idx * 2 + 1, 5 * idx);
|
||||
}
|
||||
});
|
||||
|
||||
// we expect index[key] = 5 * key / 2;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testQueryData() {
|
||||
long[] keys = new long[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
|
||||
BTreeReader reader = new BTreeReader(array, ctx, 0);
|
||||
|
||||
long[] data = reader.queryData(keys, 1);
|
||||
|
||||
assertArrayEquals(new long[] { 0, 5, 0, 10, 0, 15, 0, 20, 0, 25 }, data);
|
||||
}
|
||||
|
||||
}
|
@@ -1,40 +0,0 @@
|
||||
package nu.marginalia.btree;
|
||||
|
||||
import nu.marginalia.array.LongArray;
|
||||
import nu.marginalia.btree.model.BTreeBlockSize;
|
||||
import nu.marginalia.btree.model.BTreeContext;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
|
||||
|
||||
public class BTreeReaderQueryDataWithoutIndexTest {
|
||||
BTreeContext ctx = new BTreeContext(5, 2, BTreeBlockSize.BS_2048);
|
||||
LongArray array;
|
||||
|
||||
@BeforeEach
|
||||
public void setUp() throws IOException {
|
||||
array = LongArray.allocate(65536);
|
||||
new BTreeWriter(array, ctx).write(0, 1000, slice -> {
|
||||
for (int idx = 0; idx < 1000; idx++) {
|
||||
slice.set(idx * 2, 2 * idx);
|
||||
slice.set(idx * 2 + 1, 5 * idx);
|
||||
}
|
||||
});
|
||||
|
||||
// we expect index[key] = 5 * key / 2;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testQueryData() {
|
||||
long[] keys = new long[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
|
||||
BTreeReader reader = new BTreeReader(array, ctx, 0);
|
||||
|
||||
long[] data = reader.queryData(keys, 1);
|
||||
|
||||
assertArrayEquals(data, new long[] { 0, 5, 0, 10, 0, 15, 0, 20, 0, 25 });
|
||||
}
|
||||
|
||||
}
|
@@ -1,59 +0,0 @@
|
||||
package nu.marginalia.btree;
|
||||
|
||||
import nu.marginalia.array.LongArray;
|
||||
import nu.marginalia.array.page.LongQueryBuffer;
|
||||
import nu.marginalia.btree.model.BTreeBlockSize;
|
||||
import nu.marginalia.btree.model.BTreeContext;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
|
||||
|
||||
public class BTreeReaderRejectRetainWithIndexTest {
|
||||
BTreeContext ctx = new BTreeContext(5, 1, BTreeBlockSize.BS_32);
|
||||
LongArray array;
|
||||
|
||||
@BeforeEach
|
||||
public void setUp() throws IOException {
|
||||
array = LongArray.allocate(65536);
|
||||
new BTreeWriter(array, ctx).write(0, 1000, slice -> {
|
||||
int p = 2;
|
||||
for (int idx = 0; idx < 1000; idx++) {
|
||||
slice.set(idx, p);
|
||||
p = (int) NextPrimeUtil.nextPrime(p + 1, 1);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRetain() {
|
||||
LongQueryBuffer odds = new LongQueryBuffer(50);
|
||||
for (int i = 0; i < 50; i++)
|
||||
odds.data.set(i, 2L*i + 1);
|
||||
|
||||
BTreeReader reader = new BTreeReader(array, ctx, 0);
|
||||
reader.retainEntries(odds);
|
||||
odds.finalizeFiltering();
|
||||
|
||||
long[] primeOdds = odds.copyData();
|
||||
long[] first100OddPrimes = new long[] { 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97 };
|
||||
assertArrayEquals(first100OddPrimes, primeOdds);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReject() {
|
||||
LongQueryBuffer odds = new LongQueryBuffer(50);
|
||||
for (int i = 0; i < 50; i++)
|
||||
odds.data.set(i, 2L*i + 1);
|
||||
|
||||
BTreeReader reader = new BTreeReader(array, ctx, 0);
|
||||
reader.rejectEntries(odds);
|
||||
odds.finalizeFiltering();
|
||||
|
||||
long[] nonPrimeOdds = odds.copyData();
|
||||
long[] first100OddNonPrimes = new long[] { 1, 9, 15, 21, 25, 27, 33, 35, 39, 45, 49, 51, 55, 57, 63, 65, 69, 75, 77, 81, 85, 87, 91, 93, 95, 99 };
|
||||
assertArrayEquals(first100OddNonPrimes, nonPrimeOdds);
|
||||
}
|
||||
}
|
@@ -1,60 +0,0 @@
|
||||
package nu.marginalia.btree;
|
||||
|
||||
import nu.marginalia.array.LongArray;
|
||||
import nu.marginalia.array.page.LongQueryBuffer;
|
||||
import nu.marginalia.btree.model.BTreeBlockSize;
|
||||
import nu.marginalia.btree.model.BTreeContext;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
|
||||
|
||||
public class BTreeReaderRejectRetainWithoutIndexTest {
|
||||
BTreeContext ctx = new BTreeContext(5, 1, BTreeBlockSize.BS_2048);
|
||||
LongArray array;
|
||||
|
||||
@BeforeEach
|
||||
public void setUp() throws IOException {
|
||||
array = LongArray.allocate(65536);
|
||||
new BTreeWriter(array, ctx).write(0, 1000, slice -> {
|
||||
int p = 2;
|
||||
for (int idx = 0; idx < 1000; idx++) {
|
||||
slice.set(idx, p);
|
||||
p = (int) NextPrimeUtil.nextPrime(p + 1, 1);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRetain() {
|
||||
LongQueryBuffer odds = new LongQueryBuffer(50);
|
||||
for (int i = 0; i < 50; i++)
|
||||
odds.data.set(i, 2L*i + 1);
|
||||
|
||||
BTreeReader reader = new BTreeReader(array, ctx, 0);
|
||||
reader.retainEntries(odds);
|
||||
odds.finalizeFiltering();
|
||||
|
||||
long[] primeOdds = odds.copyData();
|
||||
long[] first100OddPrimes = new long[] { 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97 };
|
||||
assertArrayEquals(first100OddPrimes, primeOdds);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReject() {
|
||||
LongQueryBuffer odds = new LongQueryBuffer(50);
|
||||
for (int i = 0; i < 50; i++)
|
||||
odds.data.set(i, 2L*i + 1);
|
||||
|
||||
|
||||
BTreeReader reader = new BTreeReader(array, ctx, 0);
|
||||
reader.rejectEntries(odds);
|
||||
odds.finalizeFiltering();
|
||||
|
||||
long[] nonPrimeOdds = odds.copyData();
|
||||
long[] first100OddNonPrimes = new long[] { 1, 9, 15, 21, 25, 27, 33, 35, 39, 45, 49, 51, 55, 57, 63, 65, 69, 75, 77, 81, 85, 87, 91, 93, 95, 99 };
|
||||
assertArrayEquals(first100OddNonPrimes, nonPrimeOdds);
|
||||
}
|
||||
}
|
@@ -22,18 +22,6 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
class BTreeWriterTest {
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
void testSmallDataBlock() {
|
||||
BTreeContext ctx = new BTreeContext(4, 2, BTreeBlockSize.BS_64);
|
||||
BTreeWriter writer = new BTreeWriter(null, ctx);
|
||||
|
||||
var header = writer.makeHeader(ctx, 1024, ctx.pageSize()/2);
|
||||
assertEquals(1024 + BTreeHeader.BTreeHeaderSizeLongs, header.dataOffsetLongs());
|
||||
assertEquals(header.dataOffsetLongs(), header.indexOffsetLongs());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testLayerCount() {
|
||||
BTreeContext ctx = new BTreeContext(4, 2, BTreeBlockSize.BS_64);
|
||||
|
@@ -0,0 +1,497 @@
|
||||
package nu.marginalia.skiplist;
|
||||
|
||||
import it.unimi.dsi.fastutil.longs.LongAVLTreeSet;
|
||||
import it.unimi.dsi.fastutil.longs.LongArrayList;
|
||||
import it.unimi.dsi.fastutil.longs.LongSortedSet;
|
||||
import nu.marginalia.array.LongArray;
|
||||
import nu.marginalia.array.LongArrayFactory;
|
||||
import nu.marginalia.array.page.LongQueryBuffer;
|
||||
import nu.marginalia.array.pool.BufferPool;
|
||||
import org.junit.jupiter.api.*;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.foreign.Arena;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.lang.foreign.ValueLayout;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.stream.LongStream;
|
||||
|
||||
public class SkipListReaderTest {
|
||||
Path docsFile;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() throws IOException {
|
||||
docsFile = Files.createTempFile(SkipListWriterTest.class.getSimpleName(), ".docs.dat");
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
void tearDown() throws IOException {
|
||||
Files.deleteIfExists(docsFile);
|
||||
}
|
||||
|
||||
LongArray createArray(long[] keys, long[] values) {
|
||||
return createArray(Arena.ofAuto(), keys, values);
|
||||
}
|
||||
|
||||
LongArray createArray(Arena arena, long[] keys, long[] values) {
|
||||
assert keys.length == values.length;
|
||||
MemorySegment ms = arena.allocate(keys.length * 16);
|
||||
for (int i = 0; i < keys.length; i++) {
|
||||
ms.setAtIndex(ValueLayout.JAVA_LONG, 2L*i, keys[i]);
|
||||
ms.setAtIndex(ValueLayout.JAVA_LONG, 2L*i+1, values[i]);
|
||||
}
|
||||
return LongArrayFactory.wrap(ms);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTenBlocks() throws IOException {
|
||||
long[] keys = LongStream.range(0, 300).toArray();
|
||||
long[] vals = LongStream.range(0, 300).map(v -> -v).toArray();
|
||||
|
||||
try (var writer = new SkipListWriter(docsFile)) {
|
||||
writer.writeList(createArray(keys, vals), 0, keys.length);
|
||||
}
|
||||
|
||||
try (var pool = new BufferPool(docsFile, SkipListConstants.BLOCK_SIZE, 8)) {
|
||||
var reader = new SkipListReader(pool, 0);
|
||||
LongQueryBuffer lqb = new LongQueryBuffer(20);
|
||||
while (!reader.atEnd()) {
|
||||
System.out.println(reader.estimateSize());
|
||||
System.out.println(reader.getData(lqb));
|
||||
System.out.println(Arrays.toString(lqb.copyData()));
|
||||
lqb.zero();
|
||||
}
|
||||
}
|
||||
|
||||
System.out.println("---");
|
||||
|
||||
try (var pool = new BufferPool(docsFile, SkipListConstants.BLOCK_SIZE, 8)) {
|
||||
var reader = new SkipListReader(pool, 0);
|
||||
LongQueryBuffer lqb = new LongQueryBuffer(40);
|
||||
while (!reader.atEnd()) {
|
||||
System.out.println(reader.estimateSize());
|
||||
System.out.println(reader.getData(lqb));
|
||||
System.out.println(Arrays.toString(lqb.copyData()));
|
||||
if (!lqb.fitsMore()) {
|
||||
lqb.zero();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRetainTenBlocks() throws IOException {
|
||||
long[] keys = LongStream.range(0, 300).map(v -> 2*v).toArray();
|
||||
long[] vals = LongStream.range(0, 300).map(v -> -v).toArray();
|
||||
|
||||
try (var writer = new SkipListWriter(docsFile)) {
|
||||
writer.writeList(createArray(keys, vals), 0, keys.length);
|
||||
}
|
||||
|
||||
try (var pool = new BufferPool(docsFile, SkipListConstants.BLOCK_SIZE, 8)) {
|
||||
var reader = new SkipListReader(pool, 0);
|
||||
LongQueryBuffer lqb = new LongQueryBuffer(new long[] { 4, 5, 30, 39, 270, 300, 551 }, 7);
|
||||
reader.retainData(lqb);
|
||||
lqb.finalizeFiltering();
|
||||
System.out.println(Arrays.toString(lqb.copyData()));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRetainFuzz() throws IOException {
|
||||
|
||||
for (int seed = 0; seed < 100; seed++) {
|
||||
System.out.println("Seed: " + seed);
|
||||
|
||||
Random r = new Random(seed);
|
||||
|
||||
int nKeys = 8; r.nextInt(100, 1000);
|
||||
LongSortedSet intersectionsSet = new LongAVLTreeSet();
|
||||
LongSortedSet keysSet = new LongAVLTreeSet();
|
||||
LongSortedSet qbSet = new LongAVLTreeSet();
|
||||
|
||||
while (intersectionsSet.size() < 64) {
|
||||
long val = r.nextLong(0, 10_000);
|
||||
keysSet.add(val);
|
||||
qbSet.add(val);
|
||||
intersectionsSet.add(val);
|
||||
}
|
||||
while (keysSet.size() < nKeys) {
|
||||
long val = r.nextLong(0, 10_000);
|
||||
keysSet.add(val);
|
||||
}
|
||||
|
||||
while (qbSet.size() < 512) {
|
||||
long val = r.nextLong(0, 10_000);
|
||||
if (keysSet.contains(val)) continue;
|
||||
|
||||
qbSet.add(val);
|
||||
}
|
||||
|
||||
long[] keys = keysSet.toLongArray();
|
||||
|
||||
try (var writer = new SkipListWriter(docsFile);
|
||||
Arena arena = Arena.ofConfined()
|
||||
) {
|
||||
writer.writeList(createArray(arena, keys, keys), 0, keys.length);
|
||||
}
|
||||
|
||||
try (var pool = new BufferPool(docsFile, SkipListConstants.BLOCK_SIZE, 8)) {
|
||||
var reader = new SkipListReader(pool, 0);
|
||||
LongQueryBuffer lqb = new LongQueryBuffer(qbSet.toLongArray(), qbSet.size());
|
||||
|
||||
System.out.println("Keys: " + Arrays.toString(keysSet.toLongArray()));
|
||||
System.out.println("QB Input: " + Arrays.toString(qbSet.toLongArray()));
|
||||
|
||||
reader.retainData(lqb);
|
||||
lqb.finalizeFiltering();
|
||||
long[] actual = lqb.copyData();
|
||||
long[] expected = intersectionsSet.toLongArray();
|
||||
|
||||
|
||||
System.out.println("Expected intersection: " + Arrays.toString(intersectionsSet.toLongArray()));
|
||||
System.out.println("Actual intersection: " + Arrays.toString(lqb.copyData()));
|
||||
Assertions.assertArrayEquals(expected, actual);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testRetainFuzz1() throws IOException {
|
||||
|
||||
long seedOffset = System.nanoTime();
|
||||
|
||||
for (int seed = 0; seed < 100; seed++) {
|
||||
System.out.println("Seed: " + (seed + seedOffset));
|
||||
|
||||
Random r = new Random(seed + seedOffset);
|
||||
|
||||
LongSortedSet keyset = new LongAVLTreeSet();
|
||||
|
||||
int nkeys = r.nextInt(SkipListConstants.BLOCK_SIZE/2, SkipListConstants.BLOCK_SIZE*4);
|
||||
while (keyset.size() < nkeys) {
|
||||
long val = r.nextLong(0, 10_000_000);
|
||||
|
||||
keyset.add(val);
|
||||
}
|
||||
|
||||
long[] keys = keyset.toLongArray();
|
||||
long[] qbs = new long[] { keys[r.nextInt(0, keys.length)] };
|
||||
|
||||
long off = 0;
|
||||
try (var writer = new SkipListWriter(docsFile);
|
||||
Arena arena = Arena.ofConfined()
|
||||
) {
|
||||
writer.padDocuments(8*r.nextInt(0, SkipListConstants.BLOCK_SIZE/8));
|
||||
off = writer.writeList(createArray(arena, keys, keys), 0, keys.length);
|
||||
}
|
||||
|
||||
try (var pool = new BufferPool(docsFile, SkipListConstants.BLOCK_SIZE, 8)) {
|
||||
var reader = new SkipListReader(pool, off);
|
||||
LongQueryBuffer lqb = new LongQueryBuffer(qbs, 1);
|
||||
|
||||
reader.retainData(lqb);
|
||||
lqb.finalizeFiltering();
|
||||
long[] actual = lqb.copyData();
|
||||
long[] expected = qbs;
|
||||
|
||||
|
||||
System.out.println(Arrays.toString(expected));
|
||||
System.out.println(Arrays.toString(actual));
|
||||
Assertions.assertArrayEquals(expected, actual);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRejectFuzz1() throws IOException {
|
||||
|
||||
long seedOffset = System.nanoTime();
|
||||
for (int seed = 0; seed < 100; seed++) {
|
||||
System.out.println("Seed: " + (seed + seedOffset));
|
||||
|
||||
Random r = new Random(seed + seedOffset);
|
||||
|
||||
LongSortedSet keyset = new LongAVLTreeSet();
|
||||
|
||||
int nkeys = r.nextInt(SkipListConstants.BLOCK_SIZE/2, SkipListConstants.BLOCK_SIZE*4);
|
||||
while (keyset.size() < nkeys) {
|
||||
long val = r.nextLong(0, 10_000_000);
|
||||
|
||||
keyset.add(val);
|
||||
}
|
||||
|
||||
long[] keys = keyset.toLongArray();
|
||||
long[] qbs = new long[] { keys[r.nextInt(0, keys.length)] };
|
||||
|
||||
long off = 0;
|
||||
try (var writer = new SkipListWriter(docsFile);
|
||||
Arena arena = Arena.ofConfined()
|
||||
) {
|
||||
writer.padDocuments(8*r.nextInt(0, SkipListConstants.BLOCK_SIZE/8));
|
||||
off = writer.writeList(createArray(arena, keys, keys), 0, keys.length);
|
||||
}
|
||||
|
||||
try (var pool = new BufferPool(docsFile, SkipListConstants.BLOCK_SIZE, 8)) {
|
||||
var reader = new SkipListReader(pool, off);
|
||||
LongQueryBuffer lqb = new LongQueryBuffer(qbs, 1);
|
||||
|
||||
reader.rejectData(lqb);
|
||||
lqb.finalizeFiltering();
|
||||
long[] actual = lqb.copyData();
|
||||
long[] expected = new long[0];
|
||||
|
||||
|
||||
System.out.println(Arrays.toString(expected));
|
||||
System.out.println(Arrays.toString(actual));
|
||||
Assertions.assertArrayEquals(expected, actual);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Tag("slow")
|
||||
@Test
|
||||
public void testGetDataFuzz() throws IOException {
|
||||
|
||||
for (int seed = 0; seed < 256; seed++) {
|
||||
System.out.println("Seed: " + seed);
|
||||
|
||||
Random r = new Random(seed);
|
||||
|
||||
int nKeys = 8; r.nextInt(100, 1000);
|
||||
LongSortedSet intersectionsSet = new LongAVLTreeSet();
|
||||
LongSortedSet keysSet = new LongAVLTreeSet();
|
||||
LongSortedSet qbSet = new LongAVLTreeSet();
|
||||
|
||||
while (intersectionsSet.size() < 64) {
|
||||
long val = r.nextLong(0, 10_000);
|
||||
keysSet.add(val);
|
||||
qbSet.add(val);
|
||||
intersectionsSet.add(val);
|
||||
}
|
||||
while (keysSet.size() < nKeys) {
|
||||
long val = r.nextLong(0, 10_000);
|
||||
keysSet.add(val);
|
||||
}
|
||||
|
||||
while (qbSet.size() < 512) {
|
||||
long val = r.nextLong(0, 10_000);
|
||||
if (keysSet.contains(val)) continue;
|
||||
|
||||
qbSet.add(val);
|
||||
}
|
||||
|
||||
long[] keys = keysSet.toLongArray();
|
||||
|
||||
long blockStart;
|
||||
try (var writer = new SkipListWriter(docsFile);
|
||||
Arena arena = Arena.ofConfined()
|
||||
) {
|
||||
writer.padDocuments(r.nextInt(0, 4096/8) * 8);
|
||||
blockStart = writer.writeList(createArray(arena, keys, keys), 0, keys.length);
|
||||
}
|
||||
|
||||
try (var pool = new BufferPool(docsFile, SkipListConstants.BLOCK_SIZE, 8)) {
|
||||
var reader = new SkipListReader(pool, blockStart);
|
||||
try (var page = pool.get(blockStart & -SkipListConstants.BLOCK_SIZE)) {
|
||||
reader.parseBlock(page.getMemorySegment(), (int) blockStart & (-SkipListConstants.BLOCK_SIZE));
|
||||
}
|
||||
|
||||
long[] queryKeys = qbSet.toLongArray();
|
||||
long[] queryVals = reader.getValueOffsets(queryKeys);
|
||||
|
||||
LongSortedSet presentValues = new LongAVLTreeSet();
|
||||
for (int i = 0; i < queryKeys.length; i++) {
|
||||
if (queryVals[i] != 0) {
|
||||
presentValues.add(queryKeys[i]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
System.out.println("Keys: " + Arrays.toString(keysSet.toLongArray()));
|
||||
System.out.println("QB Input: " + Arrays.toString(qbSet.toLongArray()));
|
||||
|
||||
long[] actual = presentValues.toLongArray();
|
||||
long[] expected = intersectionsSet.toLongArray();
|
||||
|
||||
System.out.println("Expected intersection: " + Arrays.toString(intersectionsSet.toLongArray()));
|
||||
System.out.println("Actual intersection: " + Arrays.toString(presentValues.toLongArray()));
|
||||
Assertions.assertArrayEquals(expected, actual);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@Tag("slow")
|
||||
public void testParseFuzz() throws IOException {
|
||||
|
||||
long seedOffset = System.nanoTime();
|
||||
for (int seed = 0; seed < 100; seed++) {
|
||||
System.out.println("Seed: " + (seed + seedOffset));
|
||||
|
||||
Random r = new Random(seed);
|
||||
|
||||
List<long[]> keysForBlocks = new ArrayList<>();
|
||||
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
|
||||
int nVals = r.nextInt(8, SkipListConstants.MAX_RECORDS_PER_BLOCK);
|
||||
long[] keys = new long[nVals];
|
||||
for (int ki = 0; ki < keys.length; ki++) {
|
||||
keys[ki] = r.nextLong(0, Long.MAX_VALUE);
|
||||
}
|
||||
|
||||
Arrays.sort(keys);
|
||||
keysForBlocks.add(keys);
|
||||
}
|
||||
List<Long> offsets = new ArrayList<>();
|
||||
try (var writer = new SkipListWriter(docsFile);
|
||||
Arena arena = Arena.ofConfined()
|
||||
) {
|
||||
writer.padDocuments(r.nextInt(0, SkipListConstants.BLOCK_SIZE/8) * 8);
|
||||
for (var block : keysForBlocks) {
|
||||
offsets.add(writer.writeList(createArray(arena, block, block), 0, block.length));
|
||||
}
|
||||
}
|
||||
|
||||
try (var pool = new BufferPool(docsFile, SkipListConstants.BLOCK_SIZE, 8)) {
|
||||
for (var offset: offsets) {
|
||||
var reader = new SkipListReader(pool, offset);
|
||||
reader.parseBlocks(pool, offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetValueOffsets() throws IOException {
|
||||
long[] keys = LongStream.range(0, 300).map(v -> 2*v).toArray();
|
||||
long[] vals = LongStream.range(0, 300).map(v -> -2*v).toArray();
|
||||
|
||||
try (var writer = new SkipListWriter(docsFile)) {
|
||||
writer.writeList(createArray(keys, vals), 0, keys.length);
|
||||
}
|
||||
|
||||
try (var pool = new BufferPool(docsFile, SkipListConstants.BLOCK_SIZE, 8)) {
|
||||
var reader = new SkipListReader(pool, 0);
|
||||
long[] queryKeys = new long[] { 4, 5, 30, 39, 270, 300, 551 };
|
||||
long[] queryVals = reader.getValueOffsets(queryKeys);
|
||||
System.out.println(Arrays.toString(queryVals));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void getData2() throws IOException {
|
||||
long[] keys = new long[] { 100,101 };
|
||||
long[] vals = new long[] { 50,51 };
|
||||
|
||||
long pos = 0;
|
||||
try (var writer = new SkipListWriter(docsFile)) {
|
||||
pos = writer.writeList(createArray(keys, vals), 0, keys.length);
|
||||
writer.writeList(createArray(keys, vals), 0, keys.length);
|
||||
}
|
||||
|
||||
try (var pool = new BufferPool(docsFile, SkipListConstants.BLOCK_SIZE, 8)) {
|
||||
var reader = new SkipListReader(pool, pos);
|
||||
LongQueryBuffer lqb = new LongQueryBuffer(4);
|
||||
reader.getData(lqb);
|
||||
System.out.println(Arrays.toString(lqb.copyData()));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWtf() {
|
||||
LongArrayList vals = new LongArrayList();
|
||||
for (int i = 0; i < 255; i++) {
|
||||
vals.add(i);
|
||||
vals.add(-i);
|
||||
}
|
||||
|
||||
try (LongArray array = LongArrayFactory.onHeapConfined(4096);
|
||||
var writer = new SkipListWriter(docsFile)) {
|
||||
writer.padDocuments(4104);
|
||||
for (int i = 0; i < vals.size(); i++) {
|
||||
array.set(i, vals.getLong(i));
|
||||
}
|
||||
|
||||
long pos = writer.writeList(array, 513, 255*2);
|
||||
|
||||
System.out.println(pos);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
try (var pool = new BufferPool(docsFile, SkipListConstants.BLOCK_SIZE, 8)) {
|
||||
var reader = new SkipListReader(pool, 4104);
|
||||
long[] queryKeys = new long[] { 100 };
|
||||
var lqb = new LongQueryBuffer(32);
|
||||
reader.getData(lqb);
|
||||
System.out.println(Arrays.toString(lqb.copyData()));
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetValueOffsets1() throws IOException {
|
||||
long[] keys = new long[] { 100 };
|
||||
long[] vals = new long[] { 50 };
|
||||
|
||||
long pos = 0;
|
||||
try (var writer = new SkipListWriter(docsFile)) {
|
||||
pos = writer.writeList(createArray(keys, vals), 0, keys.length);
|
||||
writer.writeList(createArray(keys, vals), 0, keys.length);
|
||||
}
|
||||
|
||||
try (var pool = new BufferPool(docsFile, SkipListConstants.BLOCK_SIZE, 8)) {
|
||||
var reader = new SkipListReader(pool, pos);
|
||||
long[] queryKeys = new long[] { 100 };
|
||||
long[] queryVals = reader.getValueOffsets(queryKeys);
|
||||
System.out.println(Arrays.toString(queryVals));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testRejectTenBlocks() throws IOException {
|
||||
long[] keys = LongStream.range(0, 300).map(v -> 2*v).toArray();
|
||||
long[] vals = LongStream.range(0, 300).map(v -> -v).toArray();
|
||||
|
||||
try (var writer = new SkipListWriter(docsFile)) {
|
||||
writer.writeList(createArray(keys, vals), 0, keys.length);
|
||||
}
|
||||
|
||||
try (var pool = new BufferPool(docsFile, SkipListConstants.BLOCK_SIZE, 8)) {
|
||||
var reader = new SkipListReader(pool, 0);
|
||||
LongQueryBuffer lqb = new LongQueryBuffer(new long[] { 4, 5, 30, 39, 270, 300, 551 }, 7);
|
||||
reader.rejectData(lqb);
|
||||
System.out.println(Arrays.toString(lqb.copyData()));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void retainInPage() {
|
||||
long[] keys = new long[] { 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 67108964, 67108966, 67108968, 67108970, 67108972, 67108974, 67108976, 67108978, 67108980, 67108982, 67108984, 67108986, 67108988, 67108990, 67108992, 67108994, 67108996, 67108998, 67109000, 67109002, 67109004, 67109006, 67109008, 67109010, 67109012, 67109014, 67109016, 67109018, 67109020, 67109022, 67109024, 67109026, 67109028, 67109030, 67109032, 67109034, 67109036, 67109038, 67109040, 67109042, 67109044, 67109046, 67109048, 67109050, 67109052, 67109054, 67109056, 67109058, 67109060, 67109062, 134217928, 134217930, 134217932, 134217934, 134217936, 134217938, 134217940, 134217942, 134217944, 134217946, 134217948, 134217950, 134217952, 134217954, 134217956, 134217958, 134217960, 134217962, 134217964, 134217966, 134217968, 134217970, 134217972, 134217974, 134217976, 134217978, 134217980, 134217982, 134217984, 134217986, 134217988, 134217990, 134217992, 134217994, 134217996, 134217998, 134218000, 134218002, 134218004, 134218006, 134218008, 134218010, 134218012, 134218014, 134218016, 134218018, 134218020, 134218022, 134218024, 134218026, 201326892, 201326894, 201326896, 201326898, 201326900, 201326902, 201326904, 201326906, 201326908, 201326910, 201326912, 201326914, 201326916, 201326918, 201326920, 201326922, 201326924, 201326926, 201326928, 201326930, 201326932, 201326934, 201326936, 201326938, 201326940, 201326942, 201326944, 201326946, 201326948, 201326950, 201326952, 201326954, 201326956, 201326958, 201326960, 201326962, 201326964, 201326966, 201326968, 201326970, 201326972, 201326974, 201326976, 201326978, 201326980, 201326982, 201326984, 201326986, 201326988, 201326990, 268435856, 268435858, 268435860, 268435862, 268435864, 268435866, 268435868, 268435870, 268435872, 268435874, 268435876, 268435878, 268435880, 268435882, 268435884, 268435886, 268435888, 268435890, 268435892, 268435894, 268435896, 268435898, 268435900, 268435902, 268435904, 268435906, 268435908, 268435910, 268435912, 268435914, 268435916, 268435918, 268435920, 268435922, 268435924, 268435926, 268435928, 268435930, 268435932, 268435934, 268435936, 268435938, 268435940, 268435942, 268435944, 268435946, 268435948, 268435950, 268435952, 268435954, 335544820, 335544822, 335544824, 335544826, 335544828, 335544830 };
|
||||
long[] vals = Arrays.copyOf(keys, keys.length);
|
||||
long[] qbdata = new long[] { 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 67108964, 67108969, 67108974, 67108979, 67108984, 67108989, 67108994, 67108999, 67109004, 67109009, 67109014, 67109019, 67109024, 67109029, 67109034, 67109039, 67109044, 67109049, 67109054, 67109059, 134217928, 134217933, 134217938, 134217943, 134217948, 134217953, 134217958, 134217963, 134217968, 134217973, 134217978, 134217983, 134217988, 134217993, 134217998, 134218003, 134218008, 134218013, 134218018, 134218023, 201326892, 201326897, 201326902, 201326907, 201326912, 201326917, 201326922, 201326927, 201326932, 201326937, 201326942, 201326947, 201326952, 201326957, 201326962, 201326967, 201326972, 201326977, 201326982, 201326987, 268435856, 268435861, 268435866, 268435871, 268435876, 268435881, 268435886, 268435891, 268435896, 268435901, 268435906, 268435911, 268435916, 268435921, 268435926, 268435931, 268435936, 268435941, 268435946, 268435951, 335544820, 335544825, 335544830 };
|
||||
|
||||
try (var writer = new SkipListWriter(docsFile)) {
|
||||
writer.writeList(createArray(keys, vals), 0, keys.length);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
try (var pool = new BufferPool(docsFile, SkipListConstants.BLOCK_SIZE, 8)) {
|
||||
var reader = new SkipListReader(pool, 0);
|
||||
var qb = new LongQueryBuffer(qbdata, qbdata.length);
|
||||
reader.retainData(qb);
|
||||
System.out.println(Arrays.toString(qb.copyData()));
|
||||
}
|
||||
}
|
||||
}
|
@@ -0,0 +1,378 @@
|
||||
package nu.marginalia.skiplist;
|
||||
|
||||
import it.unimi.dsi.fastutil.longs.LongAVLTreeSet;
|
||||
import it.unimi.dsi.fastutil.longs.LongArrayList;
|
||||
import it.unimi.dsi.fastutil.longs.LongList;
|
||||
import it.unimi.dsi.fastutil.longs.LongSortedSet;
|
||||
import nu.marginalia.array.LongArray;
|
||||
import nu.marginalia.array.LongArrayFactory;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.foreign.Arena;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.lang.foreign.ValueLayout;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Random;
|
||||
import java.util.stream.LongStream;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
class SkipListWriterTest {
|
||||
Path docsFile;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() throws IOException {
|
||||
docsFile = Files.createTempFile(SkipListWriterTest.class.getSimpleName(), ".docs.dat");
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
void tearDown() throws IOException {
|
||||
Files.deleteIfExists(docsFile);
|
||||
}
|
||||
|
||||
LongArray createArray(long[] keys, long[] values) {
|
||||
assert keys.length == values.length;
|
||||
MemorySegment ms = Arena.ofAuto().allocate(keys.length * 16);
|
||||
for (int i = 0; i < keys.length; i++) {
|
||||
ms.setAtIndex(ValueLayout.JAVA_LONG, 2L*i, keys[i]);
|
||||
ms.setAtIndex(ValueLayout.JAVA_LONG, 2L*i+1, values[i]);
|
||||
}
|
||||
return LongArrayFactory.wrap(ms);
|
||||
}
|
||||
|
||||
LongArray createArray(Arena arena, long[] keys, long[] values) {
|
||||
assert keys.length == values.length;
|
||||
MemorySegment ms = arena.allocate(keys.length * 16);
|
||||
for (int i = 0; i < keys.length; i++) {
|
||||
ms.setAtIndex(ValueLayout.JAVA_LONG, 2L*i, keys[i]);
|
||||
ms.setAtIndex(ValueLayout.JAVA_LONG, 2L*i+1, values[i]);
|
||||
}
|
||||
return LongArrayFactory.wrap(ms);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWriteSingleBlock() throws IOException {
|
||||
long pos1, pos2;
|
||||
try (var writer = new SkipListWriter(docsFile)) {
|
||||
pos1 = writer.writeList(
|
||||
createArray(new long[] {0,1,2,3,4,5,6,7}, new long[] { -0,-1,-2,-3,-4,-5,-6,-7}), 0, 8);
|
||||
pos2 = writer.writeList(
|
||||
createArray(new long[] {0,1,2,3}, new long[] { -0,-1,-2,-3}), 4, 2);
|
||||
}
|
||||
|
||||
System.out.println(pos1);
|
||||
System.out.println(pos2);
|
||||
|
||||
try (var arr = LongArrayFactory.mmapForReadingConfined(docsFile)) {
|
||||
var ms = arr.getMemorySegment();
|
||||
|
||||
var actual1 = SkipListReader.parseBlock(ms, (int) pos1);
|
||||
var expected1 = new SkipListReader.RecordView(8, 0, SkipListConstants.FLAG_END_BLOCK,
|
||||
new LongArrayList(),
|
||||
new LongArrayList(new long[] { 0,1,2,3,4,5,6,7})
|
||||
);
|
||||
|
||||
System.out.println(actual1);
|
||||
System.out.println(expected1);
|
||||
assertEquals(expected1, actual1);
|
||||
|
||||
var actual2 = SkipListReader.parseBlock(ms, (int) pos2);
|
||||
var expected2 = new SkipListReader.RecordView(2, 0, SkipListConstants.FLAG_END_BLOCK,
|
||||
new LongArrayList(),
|
||||
new LongArrayList(new long[] { 2,3}));
|
||||
|
||||
System.out.println(actual2);
|
||||
System.out.println(expected2);
|
||||
assertEquals(expected2, actual2);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTwoBlocks() throws IOException {
|
||||
long pos1;
|
||||
long[] keys = LongStream.range(0, (SkipListConstants.MAX_RECORDS_PER_BLOCK-32) * 2).toArray();
|
||||
long[] vals = LongStream.range(0, (SkipListConstants.MAX_RECORDS_PER_BLOCK-32) * 2).map(v -> -v).toArray();
|
||||
|
||||
try (var writer = new SkipListWriter(docsFile)) {
|
||||
pos1 = writer.writeList(createArray(keys, vals), 0, keys.length);
|
||||
}
|
||||
|
||||
System.out.println(pos1);
|
||||
|
||||
try (var arr = LongArrayFactory.mmapForReadingConfined(docsFile)) {
|
||||
LongArrayList allDocIds = new LongArrayList();
|
||||
LongArrayList allValues = new LongArrayList();
|
||||
|
||||
var blocks = SkipListReader.parseBlocks(arr.getMemorySegment(), 0);
|
||||
|
||||
for (var block : blocks) {
|
||||
System.out.println(block);
|
||||
}
|
||||
|
||||
assertEquals(2, blocks.size());
|
||||
|
||||
for (var block : blocks) {
|
||||
allDocIds.addAll(block.docIds());
|
||||
}
|
||||
|
||||
LongList expectedAllDocIds = new LongArrayList(keys);
|
||||
LongList expectedAllValues = new LongArrayList();
|
||||
|
||||
Assertions.assertEquals(expectedAllDocIds, allDocIds);
|
||||
Assertions.assertEquals(expectedAllValues, allValues);
|
||||
|
||||
var rootBlock = blocks.getFirst();
|
||||
var secondBlock = blocks.get(1);
|
||||
|
||||
LongList actualFp = rootBlock.fowardPointers();
|
||||
LongList expectedFp = new LongArrayList(new long[]{secondBlock.highestDocId()});
|
||||
|
||||
Assertions.assertEquals(expectedFp, actualFp);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTenBlocks() throws IOException {
|
||||
long pos1;
|
||||
long[] keys = LongStream.range(0, (SkipListConstants.MAX_RECORDS_PER_BLOCK-32)*10).toArray();
|
||||
long[] vals = LongStream.range(0, (SkipListConstants.MAX_RECORDS_PER_BLOCK-32)*10).map(v -> -v).toArray();
|
||||
|
||||
try (var writer = new SkipListWriter(docsFile)) {
|
||||
pos1 = writer.writeList(createArray(keys, vals), 0, keys.length);
|
||||
}
|
||||
|
||||
System.out.println(pos1);
|
||||
|
||||
try (var arr = LongArrayFactory.mmapForReadingConfined(docsFile)) {
|
||||
LongArrayList allDocIds = new LongArrayList();
|
||||
LongArrayList allValues = new LongArrayList();
|
||||
|
||||
var blocks = SkipListReader.parseBlocks(arr.getMemorySegment(), 0);
|
||||
|
||||
for (var block : blocks) {
|
||||
System.out.println(block);
|
||||
}
|
||||
|
||||
assertEquals(10, blocks.size());
|
||||
|
||||
for (var block : blocks) {
|
||||
allDocIds.addAll(block.docIds());
|
||||
}
|
||||
|
||||
LongList expectedAllDocIds = new LongArrayList(keys);
|
||||
LongList expectedAllValues = new LongArrayList();
|
||||
|
||||
Assertions.assertEquals(expectedAllDocIds, allDocIds);
|
||||
Assertions.assertEquals(expectedAllValues, allValues);
|
||||
|
||||
for (int i = 0; i < blocks.size(); i++) {
|
||||
SkipListReader.RecordView block = blocks.get(i);
|
||||
for (int fci = 0; fci < block.fc(); fci++) {
|
||||
int skipOffset = SkipListConstants.skipOffsetForPointer(fci);
|
||||
Assertions.assertTrue(i + skipOffset < blocks.size());
|
||||
Assertions.assertEquals(block.fowardPointers().getLong(fci), blocks.get(i+skipOffset).highestDocId());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTenBlockFps() throws IOException {
|
||||
long pos1;
|
||||
long[] keys = LongStream.range(0, (SkipListConstants.MAX_RECORDS_PER_BLOCK-32)*10).toArray();
|
||||
long[] vals = LongStream.range(0, (SkipListConstants.MAX_RECORDS_PER_BLOCK-32)*10).map(v -> -v).toArray();
|
||||
|
||||
try (var writer = new SkipListWriter(docsFile)) {
|
||||
pos1 = writer.writeList(createArray(keys, vals), 0, keys.length);
|
||||
}
|
||||
|
||||
System.out.println(pos1);
|
||||
|
||||
try (var arr = LongArrayFactory.mmapForReadingConfined(docsFile)) {
|
||||
|
||||
var blocks = SkipListReader.parseBlocks(arr.getMemorySegment(), 0);
|
||||
System.out.println(blocks);
|
||||
for (int i = 0; i + 1 < blocks.size(); i++) {
|
||||
if (blocks.get(i).fowardPointers().isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
var actual = blocks.get(i).fowardPointers().getFirst();
|
||||
var expected = blocks.get(i+1).docIds().getLast();
|
||||
assertEquals(actual, expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTenBlockFpsPadded() throws IOException {
|
||||
long pos1;
|
||||
long[] keys = LongStream.range(0, (SkipListConstants.MAX_RECORDS_PER_BLOCK-32)*10).toArray();
|
||||
long[] vals = LongStream.range(0, (SkipListConstants.MAX_RECORDS_PER_BLOCK-32)*10).map(v -> -v).toArray();
|
||||
|
||||
try (var writer = new SkipListWriter(docsFile)) {
|
||||
writer.padDocuments(64);
|
||||
pos1 = writer.writeList(createArray(keys, vals), 0, keys.length);
|
||||
}
|
||||
|
||||
try (var arr = LongArrayFactory.mmapForReadingConfined(docsFile)) {
|
||||
|
||||
var blocks = SkipListReader.parseBlocks(arr.getMemorySegment(), 0);
|
||||
for (int i = 0; i + 1 < blocks.size(); i++) {
|
||||
if (blocks.get(i).fowardPointers().isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
var actual = blocks.get(i).fowardPointers().getFirst();
|
||||
var expected = blocks.get(i+1).docIds().getLast();
|
||||
System.out.println(actual + " vs " + expected);
|
||||
assertEquals(actual, expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFpFuzz() throws IOException {
|
||||
|
||||
long seedOffset = System.nanoTime();
|
||||
|
||||
for (int seed = 0; seed < 100; seed++) {
|
||||
System.out.println("Seed: " + (seed + seedOffset));
|
||||
|
||||
Random r = new Random(seed + seedOffset);
|
||||
|
||||
LongSortedSet keyset = new LongAVLTreeSet();
|
||||
|
||||
int nkeys = r.nextInt(SkipListConstants.BLOCK_SIZE/2, SkipListConstants.BLOCK_SIZE*4);
|
||||
while (keyset.size() < nkeys) {
|
||||
long val = r.nextLong(0, 10_000_000);
|
||||
|
||||
keyset.add(val);
|
||||
}
|
||||
|
||||
long[] keys = keyset.toLongArray();
|
||||
long[] qbs = new long[] { keys[r.nextInt(0, keys.length)] };
|
||||
|
||||
long off = 0;
|
||||
try (var writer = new SkipListWriter(docsFile);
|
||||
Arena arena = Arena.ofConfined()
|
||||
) {
|
||||
writer.padDocuments(8*r.nextInt(0, SkipListConstants.BLOCK_SIZE/8));
|
||||
off = writer.writeList(createArray(arena, keys, keys), 0, keys.length);
|
||||
}
|
||||
|
||||
|
||||
try (var arr = LongArrayFactory.mmapForReadingConfined(docsFile)) {
|
||||
|
||||
var blocks = SkipListReader.parseBlocks(arr.getMemorySegment(), 0);
|
||||
for (int i = 0; i + 1 < blocks.size(); i++) {
|
||||
if (blocks.get(i).fowardPointers().isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
var actual = blocks.get(i).fowardPointers().getFirst();
|
||||
var expected = blocks.get(i+1).docIds().getLast();
|
||||
System.out.println(actual + " vs " + expected);
|
||||
assertEquals(actual, expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testTenBlocksReadOffset() throws IOException {
|
||||
long pos1;
|
||||
|
||||
long[] readKeys = LongStream.range(-2, (SkipListConstants.MAX_RECORDS_PER_BLOCK-32)*10).toArray();
|
||||
long[] readVals = LongStream.range(-2, (SkipListConstants.MAX_RECORDS_PER_BLOCK-32)*10).map(v -> -v).toArray();
|
||||
|
||||
long[] expectedKeys = LongStream.range(0, (SkipListConstants.MAX_RECORDS_PER_BLOCK-32)*10).toArray();
|
||||
try (var writer = new SkipListWriter(docsFile)) {
|
||||
pos1 = writer.writeList(createArray(readKeys, readVals), 4, expectedKeys.length);
|
||||
}
|
||||
|
||||
System.out.println(pos1);
|
||||
|
||||
try (var arr = LongArrayFactory.mmapForReadingConfined(docsFile)) {
|
||||
LongArrayList allDocIds = new LongArrayList();
|
||||
LongArrayList allValues = new LongArrayList();
|
||||
|
||||
var blocks = SkipListReader.parseBlocks(arr.getMemorySegment(), 0);
|
||||
|
||||
for (var block : blocks) {
|
||||
System.out.println(block);
|
||||
}
|
||||
|
||||
assertEquals(10, blocks.size());
|
||||
|
||||
for (var block : blocks) {
|
||||
allDocIds.addAll(block.docIds());
|
||||
}
|
||||
|
||||
LongList expectedAllDocIds = new LongArrayList(expectedKeys);
|
||||
LongList expectedAllValues = new LongArrayList();
|
||||
|
||||
Assertions.assertEquals(expectedAllDocIds, allDocIds);
|
||||
Assertions.assertEquals(expectedAllValues, allValues);
|
||||
|
||||
for (int i = 0; i < blocks.size(); i++) {
|
||||
SkipListReader.RecordView block = blocks.get(i);
|
||||
for (int fci = 0; fci < block.fc(); fci++) {
|
||||
int skipOffset = SkipListConstants.skipOffsetForPointer(fci);
|
||||
Assertions.assertTrue(i + skipOffset < blocks.size());
|
||||
Assertions.assertEquals(block.fowardPointers().getLong(fci), blocks.get(i+skipOffset).highestDocId());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@Test
|
||||
public void testSkipOffsetForPointer() {
|
||||
for (int i = 0; i < 64; i++) {
|
||||
System.out.println(i + ":" + SkipListConstants.skipOffsetForPointer(i));
|
||||
}
|
||||
}
|
||||
@Test
|
||||
public void testNumPointersForBlock() {
|
||||
for (int i = 1; i < 64; i++) {
|
||||
System.out.println(i + ":" + SkipListConstants.numPointersForBlock(i));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNonRootBlockCapacity() {
|
||||
for (int i = 1; i < 64; i++) {
|
||||
System.out.println(i + ":" + SkipListConstants.nonRootBlockCapacity(i));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEstimateNumBlocks() {
|
||||
for (int i = 1; i < 1024; i++) {
|
||||
System.out.println(i + ":" + SkipListConstants.estimateNumBlocks(i));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNumPointersForRootBlock() {
|
||||
for (int i = 1; i < 1024; i++) {
|
||||
System.out.println(i + ":" + SkipListConstants.estimateNumBlocks(i) + ":" + SkipListConstants.numPointersForRootBlock(i));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void calculateNumBlocks() {
|
||||
for (int i = 1; i < 1024; i++) {
|
||||
System.out.println(i + ":" + SkipListWriter.calculateActualNumBlocks(2048, i) + ":" + SkipListConstants.estimateNumBlocks(i));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void calculateNumBlocks2() {
|
||||
System.out.println(SkipListWriter.calculateActualNumBlocks(2048,1));
|
||||
}
|
||||
}
|
20
code/libraries/native/Makefile
Executable file
20
code/libraries/native/Makefile
Executable file
@@ -0,0 +1,20 @@
|
||||
#!/usr/bin/env sh
|
||||
|
||||
CXXFLAGS=-O3 -march=native -std=c++14 -fPIC `pkg-config --cflags liburing`
|
||||
LDFLAGS=
|
||||
|
||||
# Weird hack to get liburing to link on one particular debian server
|
||||
LIBURING_PATH=`pkg-config liburing --keep-system-libs --libs-only-L | cut -c 3- | tr -d \ `/liburing.so
|
||||
|
||||
CXX=c++
|
||||
|
||||
SOURCES=src/sort.cc src/unix.cc src/uring.cc
|
||||
|
||||
all: resources/libcpp.so
|
||||
|
||||
resources/libcpp.so: ${SOURCES} resources/liburing.so
|
||||
${CXX} -shared ${CXXFLAGS} ${SOURCES} resources/liburing.so -o resources/libcpp.so
|
||||
resources/liburing.so:
|
||||
cp ${LIBURING_PATH} resources/liburing.so
|
||||
clean:
|
||||
rm -rf resources/{libcpp,liburing}.so
|
36
code/libraries/native/build.gradle
Normal file
36
code/libraries/native/build.gradle
Normal file
@@ -0,0 +1,36 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
}
|
||||
|
||||
java {
|
||||
toolchain {
|
||||
languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion))
|
||||
}
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation libs.bundles.slf4j
|
||||
implementation libs.fastutil
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
testImplementation libs.bundles.junit
|
||||
|
||||
}
|
||||
|
||||
apply from: "$rootProject.projectDir/srcsets.gradle"
|
||||
|
||||
// We use a custom task to compile the C++ code into a shared library
|
||||
// with a shellscript as gradle's c++ tasks are kind of insufferable
|
||||
|
||||
tasks.register('compileCpp', Exec) {
|
||||
inputs.files('Makefile', 'src/sort.cc', 'src/unix.cc', 'src/uring.cc')
|
||||
outputs.files('resources/libcpp.so', 'resources/liburing.so')
|
||||
|
||||
commandLine 'make', 'all'
|
||||
}
|
||||
tasks.register('cleanCpp', Exec) {
|
||||
commandLine 'make', 'clean'
|
||||
}
|
||||
|
||||
processResources.dependsOn('compileCpp')
|
||||
clean.dependsOn('cleanCpp')
|
@@ -0,0 +1,6 @@
|
||||
package nu.marginalia.asyncio;
|
||||
|
||||
import java.lang.foreign.MemorySegment;
|
||||
|
||||
public record AsyncReadRequest(int fd, MemorySegment destination, long offset) {
|
||||
}
|
@@ -0,0 +1,55 @@
|
||||
package nu.marginalia.asyncio;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
|
||||
final class SubmittedReadRequest<T> {
|
||||
|
||||
public final long id;
|
||||
|
||||
private final T context;
|
||||
private final List<AsyncReadRequest> requests;
|
||||
private final CompletableFuture<T> future;
|
||||
private int count;
|
||||
private volatile boolean success = true;
|
||||
|
||||
SubmittedReadRequest(T context, List<AsyncReadRequest> requests, CompletableFuture<T> future, long id) {
|
||||
this.context = context;
|
||||
this.requests = requests;
|
||||
this.future = future;
|
||||
this.id = id;
|
||||
this.count = requests.size();
|
||||
}
|
||||
|
||||
public List<AsyncReadRequest> getRequests() {
|
||||
return requests;
|
||||
}
|
||||
|
||||
public int count() {
|
||||
return count;
|
||||
}
|
||||
|
||||
public void canNotFinish() {
|
||||
success = false;
|
||||
count = 0;
|
||||
future.completeExceptionally(new IOException());
|
||||
}
|
||||
|
||||
public boolean partFinished(boolean successfully) {
|
||||
if (!successfully) {
|
||||
success = false;
|
||||
}
|
||||
|
||||
if (--count == 0) {
|
||||
if (success) {
|
||||
future.complete(context);
|
||||
} else {
|
||||
future.completeExceptionally(new IOException());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
@@ -0,0 +1,243 @@
|
||||
package nu.marginalia.asyncio;
|
||||
|
||||
import nu.marginalia.ffi.IoUring;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.foreign.Arena;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ArrayBlockingQueue;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import static java.lang.foreign.ValueLayout.*;
|
||||
|
||||
public class UringExecutionQueue implements AutoCloseable {
|
||||
private static final IoUring ioUringInstance = IoUring.instance();
|
||||
|
||||
private final AtomicLong requestIdCounter = new AtomicLong(1);
|
||||
private final int queueSize;
|
||||
|
||||
private final Thread executor;
|
||||
private volatile boolean running = true;
|
||||
private final MemorySegment uringQueue;
|
||||
|
||||
private final ArrayBlockingQueue<SubmittedReadRequest<? extends Object>> inputQueue;
|
||||
|
||||
public UringExecutionQueue(int queueSize) throws Throwable {
|
||||
this.inputQueue = new ArrayBlockingQueue<>(queueSize, false);
|
||||
this.queueSize = queueSize;
|
||||
this.uringQueue = (MemorySegment) ioUringInstance.uringInit.invoke(queueSize);
|
||||
|
||||
executor = Thread.ofPlatform().daemon().start(this::executionPipe);
|
||||
}
|
||||
|
||||
public void close() throws InterruptedException {
|
||||
running = false;
|
||||
executor.join();
|
||||
|
||||
try {
|
||||
ioUringInstance.uringClose.invoke(uringQueue);
|
||||
} catch (Throwable e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public <T> CompletableFuture<T> submit(T context, List<AsyncReadRequest> relatedRequests) throws InterruptedException {
|
||||
if (relatedRequests.size() > queueSize) {
|
||||
throw new IllegalArgumentException("Request batches may not exceed the queue size!");
|
||||
}
|
||||
long id = requestIdCounter.incrementAndGet();
|
||||
CompletableFuture<T> future = new CompletableFuture<>();
|
||||
inputQueue.put(new SubmittedReadRequest<>(context, relatedRequests, future, id));
|
||||
|
||||
return future;
|
||||
}
|
||||
|
||||
static class UringDispatcher implements AutoCloseable {
|
||||
private final Arena arena;
|
||||
|
||||
private final MemorySegment returnResultIds;
|
||||
private final MemorySegment readBatchIds;
|
||||
private final MemorySegment readFds;
|
||||
private final MemorySegment readBuffers;
|
||||
private final MemorySegment readSizes;
|
||||
private final MemorySegment readOffsets;
|
||||
private final MemorySegment uringQueue;
|
||||
|
||||
private int requestsToSend = 0;
|
||||
|
||||
UringDispatcher(int queueSize, MemorySegment uringQueue) {
|
||||
this.uringQueue = uringQueue;
|
||||
this.arena = Arena.ofConfined();
|
||||
|
||||
returnResultIds = arena.allocate(JAVA_LONG, queueSize);
|
||||
readBatchIds = arena.allocate(JAVA_LONG, queueSize);
|
||||
readFds = arena.allocate(JAVA_INT, queueSize);
|
||||
readBuffers = arena.allocate(ADDRESS, queueSize);
|
||||
readSizes = arena.allocate(JAVA_INT, queueSize);
|
||||
readOffsets = arena.allocate(JAVA_LONG, queueSize);
|
||||
}
|
||||
|
||||
void prepareRead(int fd, long batchId, MemorySegment segment, int size, long offset) {
|
||||
readFds.setAtIndex(JAVA_INT, requestsToSend, fd);
|
||||
readBuffers.setAtIndex(ADDRESS, requestsToSend, segment);
|
||||
readBatchIds.setAtIndex(JAVA_LONG, requestsToSend, batchId);
|
||||
readSizes.setAtIndex(JAVA_INT, requestsToSend, size);
|
||||
readOffsets.setAtIndex(JAVA_LONG, requestsToSend, offset);
|
||||
requestsToSend++;
|
||||
}
|
||||
|
||||
long[] poll() {
|
||||
try {
|
||||
// Dispatch call
|
||||
int result = (Integer) IoUring.instance.uringJustPoll.invoke(uringQueue, returnResultIds);
|
||||
|
||||
if (result < 0) {
|
||||
throw new IOException("Error in io_uring");
|
||||
}
|
||||
else {
|
||||
long[] ret = new long[result];
|
||||
for (int i = 0; i < result; i++) {
|
||||
ret[i] = returnResultIds.getAtIndex(JAVA_LONG, i);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
}
|
||||
catch (Throwable e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
finally {
|
||||
requestsToSend = 0;
|
||||
}
|
||||
}
|
||||
long[] dispatchRead(int ongoingRequests) throws IOException {
|
||||
try {
|
||||
// Dispatch call
|
||||
int result = (Integer) IoUring.instance.uringReadAndPoll.invoke(
|
||||
uringQueue,
|
||||
returnResultIds,
|
||||
ongoingRequests,
|
||||
requestsToSend,
|
||||
readBatchIds,
|
||||
readFds,
|
||||
readBuffers,
|
||||
readSizes,
|
||||
readOffsets
|
||||
);
|
||||
|
||||
if (result < 0) {
|
||||
throw new IOException("Error in io_uring");
|
||||
}
|
||||
else {
|
||||
long[] ret = new long[result];
|
||||
for (int i = 0; i < result; i++) {
|
||||
ret[i] = returnResultIds.getAtIndex(JAVA_LONG, i);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
}
|
||||
catch (Throwable e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
finally {
|
||||
requestsToSend = 0;
|
||||
}
|
||||
}
|
||||
|
||||
int getRequestsToSend() {
|
||||
return requestsToSend;
|
||||
}
|
||||
|
||||
public void close() {
|
||||
arena.close();
|
||||
}
|
||||
}
|
||||
|
||||
public void executionPipe() {
|
||||
try (var uringDispatcher = new UringDispatcher(queueSize, uringQueue)) {
|
||||
int ongoingRequests = 0;
|
||||
|
||||
// recycle between iterations to avoid allocation churn
|
||||
List<SubmittedReadRequest<?>> batchesToSend = new ArrayList<>();
|
||||
|
||||
Map<Long, SubmittedReadRequest<?>> requestsToId = new HashMap<>();
|
||||
|
||||
while (running) {
|
||||
batchesToSend.clear();
|
||||
|
||||
// if (inputQueue.isEmpty() && ongoingRequests == 0) {
|
||||
// LockSupport.parkNanos(10_000);
|
||||
// continue;
|
||||
// }
|
||||
|
||||
int remainingRequests = queueSize - ongoingRequests;
|
||||
|
||||
SubmittedReadRequest<?> request;
|
||||
|
||||
// Find batches to send that will not exceed the queue size
|
||||
while ((request = inputQueue.peek()) != null) {
|
||||
if (remainingRequests >= request.count()) {
|
||||
remainingRequests -= request.count();
|
||||
inputQueue.poll();
|
||||
|
||||
batchesToSend.add(request);
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Arrange requests from the batches into arrays to send to FFI call
|
||||
|
||||
int requestsToSend = 0;
|
||||
for (var batch : batchesToSend) {
|
||||
requestsToId.put(batch.id, batch);
|
||||
|
||||
for (var read : batch.getRequests()) {
|
||||
uringDispatcher.prepareRead(read.fd(), batch.id, read.destination(), (int) read.destination().byteSize(), read.offset());
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
ongoingRequests += uringDispatcher.getRequestsToSend();
|
||||
|
||||
long[] results;
|
||||
if (uringDispatcher.getRequestsToSend() > 0) {
|
||||
results = uringDispatcher.dispatchRead(ongoingRequests);
|
||||
}
|
||||
else {
|
||||
results = uringDispatcher.poll();
|
||||
}
|
||||
|
||||
for (long id : results) {
|
||||
requestsToId.computeIfPresent(Math.abs(id), (_, req) -> {
|
||||
if (req.partFinished(id > 0)) {
|
||||
return null;
|
||||
} else {
|
||||
return req;
|
||||
}
|
||||
});
|
||||
ongoingRequests--;
|
||||
}
|
||||
}
|
||||
catch (IOException ex) {
|
||||
ongoingRequests -= requestsToSend;
|
||||
batchesToSend.forEach(req -> {
|
||||
req.canNotFinish();
|
||||
requestsToId.remove(req.id);
|
||||
});
|
||||
}
|
||||
catch (Throwable ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
175
code/libraries/native/java/nu/marginalia/ffi/IoUring.java
Normal file
175
code/libraries/native/java/nu/marginalia/ffi/IoUring.java
Normal file
@@ -0,0 +1,175 @@
|
||||
|
||||
package nu.marginalia.ffi;
|
||||
|
||||
import nu.marginalia.uring.UringQueue;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.lang.foreign.*;
|
||||
import java.lang.invoke.MethodHandle;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
|
||||
import static java.lang.foreign.ValueLayout.*;
|
||||
|
||||
/** This class provides access to wrapper around Linux system calls.
|
||||
* <p></p>
|
||||
* isAvailable is a boolean flag that indicates whether the native
|
||||
* implementations are available. If the shared library cannot be loaded,
|
||||
* isAvailable will be false. This flag must be checked before calling
|
||||
* any of the native functions.
|
||||
* */
|
||||
@SuppressWarnings("preview")
|
||||
public class IoUring {
|
||||
public final MethodHandle uringInit;
|
||||
public final MethodHandle uringClose;
|
||||
private final MethodHandle uringReadBuffered;
|
||||
private final MethodHandle uringReadDirect;
|
||||
public final MethodHandle uringReadAndPoll;
|
||||
public final MethodHandle uringJustPoll;
|
||||
|
||||
public static final IoUring instance;
|
||||
|
||||
/** Indicates whether the native implementations are available */
|
||||
public static final boolean isAvailable;
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(IoUring.class);
|
||||
|
||||
private IoUring(Path libFile) {
|
||||
SymbolLookup libraryLookup = SymbolLookup.libraryLookup(libFile, Arena.global());
|
||||
var nativeLinker = Linker.nativeLinker();
|
||||
MemorySegment handle = libraryLookup.findOrThrow("uring_read_buffered");
|
||||
uringReadBuffered = nativeLinker.downcallHandle(handle, FunctionDescriptor.of(JAVA_INT, JAVA_INT, ADDRESS, JAVA_INT, ADDRESS, ADDRESS, ADDRESS));
|
||||
|
||||
handle = libraryLookup.findOrThrow("uring_read_direct");
|
||||
uringReadDirect = nativeLinker.downcallHandle(handle, FunctionDescriptor.of(JAVA_INT, JAVA_INT, ADDRESS, JAVA_INT, ADDRESS, ADDRESS, ADDRESS));
|
||||
|
||||
handle = libraryLookup.findOrThrow("uring_read_submit_and_poll");
|
||||
|
||||
uringReadAndPoll = nativeLinker.downcallHandle(handle, FunctionDescriptor.of(
|
||||
JAVA_INT,
|
||||
ADDRESS, // io_uring* ring
|
||||
ADDRESS, // long* result_ids
|
||||
JAVA_INT, // int in_flight_requests
|
||||
JAVA_INT, // int read_count
|
||||
ADDRESS, // long* read_batch_ids
|
||||
ADDRESS, // int* read_fds
|
||||
ADDRESS, // void** read_buffers
|
||||
ADDRESS, // unsigned int** read_sizes
|
||||
ADDRESS // long* read_offsets
|
||||
));
|
||||
handle = libraryLookup.findOrThrow("uring_poll");
|
||||
|
||||
uringJustPoll = nativeLinker.downcallHandle(handle, FunctionDescriptor.of(
|
||||
JAVA_INT,
|
||||
ADDRESS, // io_uring* ring
|
||||
ADDRESS // long* result_ids
|
||||
));
|
||||
|
||||
handle = libraryLookup.findOrThrow("initialize_uring");
|
||||
uringInit = nativeLinker.downcallHandle(handle, FunctionDescriptor.of(ADDRESS, JAVA_INT));
|
||||
|
||||
handle = libraryLookup.findOrThrow("close_uring");
|
||||
uringClose = nativeLinker.downcallHandle(handle, FunctionDescriptor.ofVoid(ADDRESS));
|
||||
}
|
||||
|
||||
static {
|
||||
Path libFile;
|
||||
IoUring ioUringI = null;
|
||||
// copy resource to temp file so it can be loaded
|
||||
try (var is = IoUring.class.getClassLoader().getResourceAsStream("liburing.so")) {
|
||||
var tempFile = File.createTempFile("liburing", ".so");
|
||||
tempFile.deleteOnExit();
|
||||
|
||||
try (var os = new FileOutputStream(tempFile)) {
|
||||
is.transferTo(os);
|
||||
os.flush();
|
||||
}
|
||||
|
||||
System.load(tempFile.getAbsolutePath());
|
||||
}
|
||||
catch (Exception e) {
|
||||
logger.info("Failed to load native library, likely not built", e);
|
||||
}
|
||||
|
||||
try (var is = IoUring.class.getClassLoader().getResourceAsStream("libcpp.so")) {
|
||||
var tempFile = File.createTempFile("libcpp", ".so");
|
||||
tempFile.deleteOnExit();
|
||||
|
||||
try (var os = new FileOutputStream(tempFile)) {
|
||||
is.transferTo(os);
|
||||
os.flush();
|
||||
}
|
||||
|
||||
libFile = tempFile.toPath();
|
||||
ioUringI = new IoUring(libFile);
|
||||
}
|
||||
catch (Exception e) {
|
||||
logger.info("Failed to load native library, likely not built", e);
|
||||
}
|
||||
|
||||
instance = ioUringI;
|
||||
isAvailable = instance != null;
|
||||
}
|
||||
|
||||
public static IoUring instance() {
|
||||
return instance;
|
||||
}
|
||||
|
||||
public static UringQueue uringOpen(int fd, int queueSize) {
|
||||
try {
|
||||
return new UringQueue((MemorySegment) instance.uringInit.invoke(queueSize), fd);
|
||||
}
|
||||
catch (Throwable t) {
|
||||
throw new RuntimeException("Failed to invoke native function", t);
|
||||
}
|
||||
}
|
||||
|
||||
public static void uringClose(UringQueue ring) {
|
||||
try {
|
||||
instance.uringClose.invoke(ring.pointer());
|
||||
}
|
||||
catch (Throwable t) {
|
||||
throw new RuntimeException("Failed to invoke native function", t);
|
||||
}
|
||||
}
|
||||
|
||||
public static int uringReadBatch(int fd, UringQueue ring, List<MemorySegment> dest, List<Long> offsets, boolean direct) {
|
||||
if (offsets.isEmpty()) {
|
||||
throw new IllegalArgumentException("Empty offset list in uringRead");
|
||||
}
|
||||
if (offsets.size() == 1) {
|
||||
if (LinuxSystemCalls.readAt(fd, dest.getFirst(), offsets.getFirst()) > 0)
|
||||
return 1;
|
||||
else return -1;
|
||||
}
|
||||
try {
|
||||
MemorySegment bufferList = Arena.ofAuto().allocate(8L * offsets.size(), 8);
|
||||
MemorySegment sizeList = Arena.ofAuto().allocate(4L * offsets.size(), 8);
|
||||
MemorySegment offsetList = Arena.ofAuto().allocate(8L * offsets.size(), 8);
|
||||
|
||||
if (dest.size() != offsets.size()) {
|
||||
throw new IllegalStateException();
|
||||
}
|
||||
|
||||
for (int i = 0; i < offsets.size(); i++) {
|
||||
var buffer = dest.get(i);
|
||||
bufferList.setAtIndex(JAVA_LONG, i, buffer.address());
|
||||
sizeList.setAtIndex(JAVA_INT, i, (int) buffer.byteSize());
|
||||
offsetList.setAtIndex(JAVA_LONG, i, offsets.get(i));
|
||||
}
|
||||
if (direct) {
|
||||
return (Integer) instance.uringReadDirect.invoke(fd, ring.pointer(), dest.size(), bufferList, sizeList, offsetList);
|
||||
}
|
||||
else {
|
||||
return (Integer) instance.uringReadBuffered.invoke(fd, ring.pointer(), dest.size(), bufferList, sizeList, offsetList);
|
||||
}
|
||||
}
|
||||
catch (Throwable t) {
|
||||
throw new RuntimeException("Failed to invoke native function", t);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@@ -0,0 +1,155 @@
|
||||
package nu.marginalia.ffi;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.lang.foreign.*;
|
||||
import java.lang.invoke.MethodHandle;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import static java.lang.foreign.ValueLayout.*;
|
||||
|
||||
/** This class provides access to wrapper around Linux system calls.
|
||||
* <p></p>
|
||||
* isAvailable is a boolean flag that indicates whether the native
|
||||
* implementations are available. If the shared library cannot be loaded,
|
||||
* isAvailable will be false. This flag must be checked before calling
|
||||
* any of the native functions.
|
||||
* */
|
||||
@SuppressWarnings("preview")
|
||||
public class LinuxSystemCalls {
|
||||
private final MethodHandle openDirect;
|
||||
private final MethodHandle openBuffered;
|
||||
private final MethodHandle closeFd;
|
||||
private final MethodHandle readAtFd;
|
||||
private final MethodHandle fadviseRandom;
|
||||
private final MethodHandle fadviseWillneed;
|
||||
private final MethodHandle madviseRandom;
|
||||
|
||||
public static final LinuxSystemCalls instance;
|
||||
|
||||
/** Indicates whether the native implementations are available */
|
||||
public static final boolean isAvailable;
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(LinuxSystemCalls.class);
|
||||
|
||||
private LinuxSystemCalls(Path libFile) {
|
||||
SymbolLookup libraryLookup = SymbolLookup.libraryLookup(libFile, Arena.global());
|
||||
var nativeLinker = Linker.nativeLinker();
|
||||
MemorySegment handle = libraryLookup.findOrThrow("open_direct_fd");
|
||||
openDirect = nativeLinker.downcallHandle(handle, FunctionDescriptor.of(JAVA_INT, ADDRESS));
|
||||
handle = libraryLookup.findOrThrow("open_buffered_fd");
|
||||
openBuffered = nativeLinker.downcallHandle(handle, FunctionDescriptor.of(JAVA_INT, ADDRESS));
|
||||
|
||||
handle = libraryLookup.findOrThrow("fadvise_random");
|
||||
fadviseRandom = nativeLinker.downcallHandle(handle, FunctionDescriptor.ofVoid(JAVA_INT));
|
||||
|
||||
handle = libraryLookup.findOrThrow("fadvise_willneed");
|
||||
fadviseWillneed = nativeLinker.downcallHandle(handle, FunctionDescriptor.ofVoid(JAVA_INT));
|
||||
|
||||
handle = libraryLookup.findOrThrow("madvise_random");
|
||||
madviseRandom = nativeLinker.downcallHandle(handle, FunctionDescriptor.ofVoid(ADDRESS, JAVA_LONG));
|
||||
handle = libraryLookup.findOrThrow("close_fd");
|
||||
closeFd = nativeLinker.downcallHandle(handle, FunctionDescriptor.ofVoid(JAVA_INT));
|
||||
|
||||
handle = libraryLookup.findOrThrow("read_at");
|
||||
readAtFd = nativeLinker.downcallHandle(handle, FunctionDescriptor.of(JAVA_INT, JAVA_INT, ADDRESS, JAVA_INT, JAVA_LONG));
|
||||
}
|
||||
|
||||
static {
|
||||
Path libFile;
|
||||
LinuxSystemCalls nativeAlgosI = null;
|
||||
// copy resource to temp file so it can be loaded
|
||||
try (var is = NativeAlgos.class.getClassLoader().getResourceAsStream("liburing.so")) {
|
||||
var tempFile = File.createTempFile("liburing", ".so");
|
||||
tempFile.deleteOnExit();
|
||||
|
||||
try (var os = new FileOutputStream(tempFile)) {
|
||||
is.transferTo(os);
|
||||
os.flush();
|
||||
}
|
||||
|
||||
System.load(tempFile.getAbsolutePath());
|
||||
}
|
||||
catch (Exception e) {
|
||||
logger.info("Failed to load native library, likely not built", e);
|
||||
}
|
||||
|
||||
try (var is = NativeAlgos.class.getClassLoader().getResourceAsStream("libcpp.so")) {
|
||||
var tempFile = File.createTempFile("libcpp", ".so");
|
||||
tempFile.deleteOnExit();
|
||||
|
||||
try (var os = new FileOutputStream(tempFile)) {
|
||||
is.transferTo(os);
|
||||
os.flush();
|
||||
}
|
||||
|
||||
libFile = tempFile.toPath();
|
||||
nativeAlgosI = new LinuxSystemCalls(libFile);
|
||||
}
|
||||
catch (Exception e) {
|
||||
logger.info("Failed to load native library, likely not built", e);
|
||||
}
|
||||
|
||||
instance = nativeAlgosI;
|
||||
isAvailable = instance != null;
|
||||
}
|
||||
|
||||
public static int openDirect(Path filename) {
|
||||
try {
|
||||
MemorySegment filenameCStr = Arena.global().allocateFrom(filename.toString());
|
||||
return (Integer) instance.openDirect.invoke(filenameCStr);
|
||||
} catch (Throwable t) {
|
||||
throw new RuntimeException("Failed to invoke native function", t);
|
||||
}
|
||||
}
|
||||
|
||||
public static int openBuffered(Path filename) {
|
||||
try {
|
||||
MemorySegment filenameCStr = Arena.global().allocateFrom(filename.toString());
|
||||
return (Integer) instance.openBuffered.invoke(filenameCStr);
|
||||
} catch (Throwable t) {
|
||||
throw new RuntimeException("Failed to invoke native function", t);
|
||||
}
|
||||
}
|
||||
|
||||
public static int readAt(int fd, MemorySegment dest, long offset) {
|
||||
try {
|
||||
return (Integer) instance.readAtFd.invoke(fd, dest, (int) dest.byteSize(), offset);
|
||||
} catch (Throwable t) {
|
||||
throw new RuntimeException("Failed to invoke native function", t);
|
||||
}
|
||||
}
|
||||
|
||||
public static void fadviseRandom(int fd) {
|
||||
try {
|
||||
instance.fadviseRandom.invoke(fd);
|
||||
} catch (Throwable t) {
|
||||
throw new RuntimeException("Failed to invoke native function", t);
|
||||
}
|
||||
}
|
||||
|
||||
public static void fadviseWillneed(int fd) {
|
||||
try {
|
||||
instance.fadviseWillneed.invoke(fd);
|
||||
} catch (Throwable t) {
|
||||
throw new RuntimeException("Failed to invoke native function", t);
|
||||
}
|
||||
}
|
||||
public static void madviseRandom(MemorySegment segment) {
|
||||
try {
|
||||
instance.madviseRandom.invoke(segment, segment.byteSize());
|
||||
} catch (Throwable t) {
|
||||
throw new RuntimeException("Failed to invoke native function", t);
|
||||
}
|
||||
}
|
||||
public static void closeFd(int fd) {
|
||||
try {
|
||||
instance.closeFd.invoke(fd);
|
||||
} catch (Throwable t) {
|
||||
throw new RuntimeException("Failed to invoke native function", t);
|
||||
}
|
||||
}
|
||||
}
|
@@ -1,4 +1,4 @@
|
||||
package nu.marginalia;
|
||||
package nu.marginalia.ffi;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
@@ -36,15 +36,14 @@ public class NativeAlgos {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(NativeAlgos.class);
|
||||
|
||||
|
||||
private NativeAlgos(Path libFile) {
|
||||
var libraryLookup = SymbolLookup.libraryLookup(libFile, Arena.global());
|
||||
SymbolLookup libraryLookup = SymbolLookup.libraryLookup(libFile, Arena.global());
|
||||
var nativeLinker = Linker.nativeLinker();
|
||||
|
||||
var handle = libraryLookup.find("ms_sort_64").get();
|
||||
MemorySegment handle = libraryLookup.findOrThrow("ms_sort_64");
|
||||
qsortHandle = nativeLinker.downcallHandle(handle, FunctionDescriptor.ofVoid(ADDRESS, JAVA_LONG, JAVA_LONG));
|
||||
|
||||
handle = libraryLookup.find("ms_sort_128").get();
|
||||
handle = libraryLookup.findOrThrow("ms_sort_128");
|
||||
qsort128Handle = nativeLinker.downcallHandle(handle,
|
||||
FunctionDescriptor.ofVoid(ADDRESS, JAVA_LONG, JAVA_LONG));
|
||||
}
|
||||
@@ -53,11 +52,25 @@ public class NativeAlgos {
|
||||
Path libFile;
|
||||
NativeAlgos nativeAlgosI = null;
|
||||
// copy resource to temp file so it can be loaded
|
||||
try (var is = NativeAlgos.class.getClassLoader().getResourceAsStream("liburing.so")) {
|
||||
var tempFile = File.createTempFile("liburing", ".so");
|
||||
tempFile.deleteOnExit();
|
||||
|
||||
try (var os = new FileOutputStream(tempFile)) {
|
||||
is.transferTo(os);
|
||||
os.flush();
|
||||
}
|
||||
|
||||
System.load(tempFile.getAbsolutePath());
|
||||
}
|
||||
catch (Exception e) {
|
||||
logger.info("Failed to load native library, likely not built", e);
|
||||
}
|
||||
|
||||
try (var is = NativeAlgos.class.getClassLoader().getResourceAsStream("libcpp.so")) {
|
||||
var tempFile = File.createTempFile("libcpp", ".so");
|
||||
tempFile.deleteOnExit();
|
||||
|
||||
|
||||
try (var os = new FileOutputStream(tempFile)) {
|
||||
is.transferTo(os);
|
||||
os.flush();
|
||||
@@ -67,15 +80,13 @@ public class NativeAlgos {
|
||||
nativeAlgosI = new NativeAlgos(libFile);
|
||||
}
|
||||
catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
logger.info("Failed to load native library, likely not built");
|
||||
logger.info("Failed to load native library, likely not built", e);
|
||||
}
|
||||
|
||||
instance = nativeAlgosI;
|
||||
isAvailable = instance != null;
|
||||
}
|
||||
|
||||
|
||||
public static void sort(MemorySegment ms, long start, long end) {
|
||||
try {
|
||||
instance.qsortHandle.invoke(ms, start, end);
|
@@ -0,0 +1,161 @@
|
||||
package nu.marginalia.uring;
|
||||
|
||||
import it.unimi.dsi.fastutil.longs.Long2IntAVLTreeMap;
|
||||
import it.unimi.dsi.fastutil.longs.LongAVLTreeSet;
|
||||
import it.unimi.dsi.fastutil.longs.LongIterator;
|
||||
import nu.marginalia.ffi.LinuxSystemCalls;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.foreign.Arena;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
public class UringFileReader implements AutoCloseable {
|
||||
private final UringQueue[] rings = new UringQueue[8];
|
||||
private final AtomicLong ringIdx = new AtomicLong();
|
||||
private final int fd;
|
||||
private final boolean direct;
|
||||
|
||||
private static final int QUEUE_SIZE = 2048;
|
||||
|
||||
public UringFileReader(Path filename, boolean direct) throws IOException {
|
||||
if (direct) {
|
||||
fd = LinuxSystemCalls.openDirect(filename);
|
||||
this.direct = true;
|
||||
}
|
||||
else {
|
||||
fd = LinuxSystemCalls.openBuffered(filename);
|
||||
LinuxSystemCalls.fadviseRandom(fd);
|
||||
this.direct = false;
|
||||
}
|
||||
for (int i = 0; i < rings.length; i++) {
|
||||
rings[i] = UringQueue.open(fd, QUEUE_SIZE);
|
||||
}
|
||||
if (fd < 0) {
|
||||
throw new IOException("Error opening direct file: " + filename);
|
||||
}
|
||||
}
|
||||
|
||||
public void fadviseWillneed() {
|
||||
LinuxSystemCalls.fadviseWillneed(fd);
|
||||
}
|
||||
|
||||
public void read(List<MemorySegment> destinations, List<Long> offsets) {
|
||||
if (destinations.size() < 5) {
|
||||
for (int i = 0; i < destinations.size(); i++) {
|
||||
var ms = destinations.get(i);
|
||||
long offset = offsets.get(i);
|
||||
|
||||
int ret;
|
||||
if (ms.byteSize() != (ret = LinuxSystemCalls.readAt(fd, ms, offset))) {
|
||||
throw new RuntimeException("Read failed, rv=" + ret + " at " + offset + " : " + ms.byteSize());
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
var ring = rings[(int) (ringIdx.getAndIncrement() % rings.length)];
|
||||
|
||||
if (destinations.size() <= QUEUE_SIZE) {
|
||||
int ret = ring.readBatch(destinations, offsets, direct);
|
||||
if (ret != offsets.size()) {
|
||||
throw new RuntimeException("Read failed, rv=" + ret);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// We *could* break the task into multiple submissions, but this leads to some
|
||||
// very unpredictable read latencies
|
||||
throw new IllegalArgumentException("Submission size exceeds queue size!");
|
||||
}
|
||||
}
|
||||
|
||||
/** This function takes a list of offsets and sizes, and translates them to a minium of blockSize'd O_DIRECT
|
||||
* reads. A single buffer will be allocated to hold all the data, to encourage HugePages allocation and
|
||||
* reduce TLB thrashing. It is still generally helpful for performance if the data is at least best-effort
|
||||
* block aligned.
|
||||
*
|
||||
* @return MemorySegment slices that contain only the requested data.
|
||||
*/
|
||||
public List<MemorySegment> readUnalignedInDirectMode(Arena arena, long[] offsets, int[] sizes, int blockSize) {
|
||||
|
||||
if (offsets.length < 1)
|
||||
return List.of();
|
||||
if (offsets.length != sizes.length) throw new IllegalArgumentException("Offsets and Sizes arrays don't match!");
|
||||
if ((blockSize & 511) != 0) throw new IllegalArgumentException("Block size must be a multiple of 512");
|
||||
|
||||
// First we work out which blocks we need to read, and how many they are
|
||||
final LongAVLTreeSet neededBlocks = new LongAVLTreeSet();
|
||||
|
||||
for (int i = 0; i < offsets.length; i++) {
|
||||
for (long block = offsets[i] & -blockSize;
|
||||
block <= ((offsets[i] + sizes[i]) & -blockSize);
|
||||
block+=blockSize)
|
||||
{
|
||||
neededBlocks.add(block);
|
||||
}
|
||||
}
|
||||
|
||||
MemorySegment allMemory = arena.allocate((long) blockSize * neededBlocks.size(), blockSize);
|
||||
|
||||
List<MemorySegment> buffers = new ArrayList<>(sizes.length);
|
||||
List<Long> bufferOffsets = new ArrayList<>(sizes.length);
|
||||
|
||||
final Long2IntAVLTreeMap blockToIdx = new Long2IntAVLTreeMap();
|
||||
LongIterator neededBlockIterator = neededBlocks.longIterator();
|
||||
|
||||
long runStart = -1;
|
||||
long runCurrent = -1;
|
||||
long sliceOffset = 0;
|
||||
|
||||
for (;;) {
|
||||
long nextBlock = neededBlockIterator.nextLong();
|
||||
|
||||
blockToIdx.put(nextBlock, blockToIdx.size());
|
||||
|
||||
if (runStart < 0) runStart = nextBlock;
|
||||
else if (runCurrent + blockSize != nextBlock) {
|
||||
int bufferSize = (int) (blockSize + runCurrent - runStart);
|
||||
bufferOffsets.add(runStart);
|
||||
buffers.add(allMemory.asSlice(sliceOffset, bufferSize));
|
||||
sliceOffset += bufferSize;
|
||||
|
||||
runStart = nextBlock;
|
||||
}
|
||||
|
||||
runCurrent = nextBlock;
|
||||
|
||||
if (!neededBlockIterator.hasNext()) {
|
||||
// If this is the last value, we need to wrap up the final run
|
||||
int bufferSize = (int) (blockSize + runCurrent - runStart);
|
||||
bufferOffsets.add(runStart);
|
||||
buffers.add(allMemory.asSlice(sliceOffset, bufferSize));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Perform the read
|
||||
read(buffers, bufferOffsets);
|
||||
|
||||
// Slice the big memory chunk into the requested slices
|
||||
List<MemorySegment> ret = new ArrayList<>(sizes.length);
|
||||
for (int i = 0; i < offsets.length; i++) {
|
||||
long offset = offsets[i];
|
||||
int size = sizes[i];
|
||||
|
||||
long startBlock = (long) blockSize * blockToIdx.get(offset & -blockSize);
|
||||
long blockOffset = offset & (blockSize - 1);
|
||||
ret.add(allMemory.asSlice(startBlock + blockOffset, size));
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
public void close() {
|
||||
for (var ring : rings) {
|
||||
ring.close();
|
||||
}
|
||||
LinuxSystemCalls.closeFd(fd);
|
||||
}
|
||||
}
|
@@ -0,0 +1,80 @@
|
||||
package nu.marginalia.uring;
|
||||
|
||||
import nu.marginalia.ffi.IoUring;
|
||||
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.locks.Lock;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
|
||||
public final class UringQueue {
|
||||
private final MemorySegment pointer;
|
||||
private final int fd;
|
||||
private final Lock lock = new ReentrantLock(true);
|
||||
|
||||
public UringQueue(MemorySegment pointer, int fd) {
|
||||
this.pointer = pointer;
|
||||
this.fd = fd;
|
||||
}
|
||||
|
||||
public static UringQueue open(int fd, int size) {
|
||||
return IoUring.uringOpen(fd, size);
|
||||
}
|
||||
|
||||
public int readBatch(List<MemorySegment> dest, List<Long> offsets, boolean direct) {
|
||||
try {
|
||||
if (!lock.tryLock(10, TimeUnit.MILLISECONDS))
|
||||
throw new RuntimeException("io_uring slow, likely backpressure!");
|
||||
|
||||
try {
|
||||
return IoUring.uringReadBatch(fd, this, dest, offsets, direct);
|
||||
}
|
||||
finally {
|
||||
lock.unlock();
|
||||
}
|
||||
}
|
||||
catch (RuntimeException ex) {
|
||||
throw ex;
|
||||
}
|
||||
catch (Exception ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
}
|
||||
|
||||
public void close() {
|
||||
IoUring.uringClose(this);
|
||||
}
|
||||
|
||||
public MemorySegment pointer() {
|
||||
return pointer;
|
||||
}
|
||||
|
||||
public int fd() {
|
||||
return fd;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (obj == this) return true;
|
||||
if (obj == null || obj.getClass() != this.getClass()) return false;
|
||||
var that = (UringQueue) obj;
|
||||
return Objects.equals(this.pointer, that.pointer) &&
|
||||
this.fd == that.fd;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(pointer, fd);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "UringQueue[" +
|
||||
"pointer=" + pointer + ", " +
|
||||
"fd=" + fd + ']';
|
||||
}
|
||||
|
||||
|
||||
}
|
@@ -1,8 +1,14 @@
|
||||
# LongArray C++ Helpers
|
||||
# Native C++ Helpers
|
||||
|
||||
This package contains helper functions for working with LongArray objects,
|
||||
as native C++ calls. The helpers are only built on Linux, and if they are absent,
|
||||
Java substitutes should be used instead.
|
||||
This package contains helper functions for calling native functions.
|
||||
|
||||
### Systems Programming Helpers
|
||||
|
||||
TBW
|
||||
|
||||
### Long Array Helpers.
|
||||
|
||||
The helpers are only built on Linux, and if they are absent, Java substitutes should be used instead.
|
||||
|
||||
Library loading and access is available through the
|
||||
[NativeAlgos](java/nu/marginalia/NativeAlgos.java) class.
|
@@ -1,7 +1,7 @@
|
||||
#include "cpphelpers.hpp"
|
||||
#include <algorithm>
|
||||
#include <stdio.h>
|
||||
#include <cstdint>
|
||||
|
||||
extern "C" {
|
||||
/* Pair of 64-bit integers. */
|
||||
/* The struct is packed to ensure that the struct is exactly 16 bytes in size, as we need to pointer
|
||||
alias on an array of 8 byte longs. Since structs guarantee that the first element is at offset 0,
|
||||
@@ -28,4 +28,7 @@ void ms_sort_128(int64_t* area, uint64_t start, uint64_t end) {
|
||||
[](const p64x2& fst, const p64x2& snd) {
|
||||
return fst.a < snd.a;
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
}
|
35
code/libraries/native/src/unix.cc
Normal file
35
code/libraries/native/src/unix.cc
Normal file
@@ -0,0 +1,35 @@
|
||||
#include <algorithm>
|
||||
#include <stdio.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <cstring>
|
||||
#include <sys/mman.h>
|
||||
|
||||
extern "C" {
|
||||
void fadvise_random(int fd) {
|
||||
posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM);
|
||||
}
|
||||
void fadvise_willneed(int fd) {
|
||||
posix_fadvise(fd, 0, 0, POSIX_FADV_WILLNEED);
|
||||
}
|
||||
void madvise_random(void* address, unsigned long size) {
|
||||
madvise(address, size, MADV_RANDOM);
|
||||
}
|
||||
|
||||
|
||||
int open_buffered_fd(char* filename) {
|
||||
return open(filename, O_RDONLY);
|
||||
}
|
||||
|
||||
int open_direct_fd(char* filename) {
|
||||
return open(filename, O_DIRECT | O_RDONLY);
|
||||
}
|
||||
|
||||
int read_at(int fd, void* buf, unsigned int count, long offset) {
|
||||
return pread(fd, buf, count, offset);
|
||||
}
|
||||
void close_fd(int fd) {
|
||||
close(fd);
|
||||
}
|
||||
|
||||
}
|
234
code/libraries/native/src/uring.cc
Normal file
234
code/libraries/native/src/uring.cc
Normal file
@@ -0,0 +1,234 @@
|
||||
#include <algorithm>
|
||||
#include <stdio.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <liburing.h>
|
||||
#include <string.h>
|
||||
|
||||
extern "C" {
|
||||
io_uring* initialize_uring(int queue_size) {
|
||||
io_uring* ring = (io_uring*) malloc(sizeof(io_uring));
|
||||
if (!ring) return NULL;
|
||||
|
||||
int ret = io_uring_queue_init(queue_size, ring, 0);
|
||||
if (ret < 0) {
|
||||
fprintf(stderr, "io_uring_queue_init failed: %s\n", strerror(-ret));
|
||||
if (-ret == ENOMEM) {
|
||||
fprintf(stderr, "If you are seeing this error, you probably need to increase `ulimit -l` or memlock in /etc/security/limits.conf");
|
||||
}
|
||||
free(ring);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
fprintf(stderr, "Initialized ring @ %p (sq=%u, cq=%u)\n",
|
||||
ring, ring->sq.ring_entries, ring->cq.ring_entries);
|
||||
return ring;
|
||||
}
|
||||
|
||||
void close_uring(io_uring* ring) {
|
||||
fprintf(stderr, "Closed ring @ %p\n", ring);
|
||||
io_uring_queue_exit(ring);
|
||||
free(ring);
|
||||
}
|
||||
|
||||
|
||||
int uring_read_submit_and_poll(
|
||||
io_uring* ring,
|
||||
long* result_ids,
|
||||
int in_flight_requests,
|
||||
int read_count,
|
||||
long* read_batch_ids,
|
||||
int* read_fds,
|
||||
void** read_buffers,
|
||||
unsigned int* read_sizes,
|
||||
long* read_offsets)
|
||||
{
|
||||
|
||||
for (int i = 0; i < read_count; i++) {
|
||||
struct io_uring_sqe *sqe = io_uring_get_sqe(ring);
|
||||
if (!sqe) {
|
||||
fprintf(stderr, "uring_queue full!");
|
||||
return -1;
|
||||
}
|
||||
|
||||
io_uring_prep_read(sqe, read_fds[i], read_buffers[i], read_sizes[i], read_offsets[i]);
|
||||
io_uring_sqe_set_data(sqe, (void*) read_batch_ids[i]);
|
||||
}
|
||||
|
||||
int wait_cnt = 8;
|
||||
|
||||
if (wait_cnt > in_flight_requests) {
|
||||
wait_cnt = in_flight_requests;
|
||||
}
|
||||
|
||||
int submitted = io_uring_submit_and_wait(ring, wait_cnt);
|
||||
if (submitted != read_count) {
|
||||
if (submitted < 0) {
|
||||
fprintf(stderr, "io_uring_submit %s\n", strerror(-submitted));
|
||||
}
|
||||
else {
|
||||
fprintf(stderr, "io_uring_submit(): submitted != %d, was %d", read_count, submitted);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int completed = 0;
|
||||
struct io_uring_cqe *cqe;
|
||||
while (io_uring_peek_cqe(ring, &cqe) == 0) {
|
||||
if (cqe->res < 0) {
|
||||
fprintf(stderr, "io_uring error: %s\n", strerror(-cqe->res));
|
||||
result_ids[completed++] = -cqe->user_data; // flag an error by sending a negative ID back so we can clean up memory allocation etc
|
||||
}
|
||||
else {
|
||||
result_ids[completed++] = cqe->user_data;
|
||||
}
|
||||
io_uring_cqe_seen(ring, cqe);
|
||||
}
|
||||
|
||||
return completed;
|
||||
}
|
||||
|
||||
int uring_poll(io_uring* ring, long* result_ids)
|
||||
{
|
||||
int completed = 0;
|
||||
struct io_uring_cqe *cqe;
|
||||
while (io_uring_peek_cqe(ring, &cqe) == 0) {
|
||||
if (cqe->res < 0) {
|
||||
fprintf(stderr, "io_uring error: %s\n", strerror(-cqe->res));
|
||||
result_ids[completed++] = -cqe->user_data; // flag an error by sending a negative ID back so we can clean up memory allocation etc
|
||||
}
|
||||
else {
|
||||
result_ids[completed++] = cqe->user_data;
|
||||
}
|
||||
io_uring_cqe_seen(ring, cqe);
|
||||
}
|
||||
|
||||
return completed;
|
||||
}
|
||||
|
||||
int uring_read_buffered(int fd, io_uring* ring, int n, void** buffers, unsigned int* sizes, long* offsets) {
|
||||
|
||||
#ifdef DEBUG_CHECKS
|
||||
|
||||
struct stat st;
|
||||
fstat(fd, &st);
|
||||
for (int i = 0; i < n; i++) {
|
||||
if (offsets[i] + sizes[i] > st.st_size) {
|
||||
fprintf(stderr, "Read beyond EOF: offset %ld >= size %ld\n",
|
||||
offsets[i], st.st_size);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
unsigned ready = io_uring_cq_ready(ring);
|
||||
if (ready > 0) {
|
||||
fprintf(stderr, "Skipping %u leftover completions\n", ready);
|
||||
io_uring_cq_advance(ring, ready);
|
||||
}
|
||||
for (int i = 0; i < n; i++) {
|
||||
struct io_uring_sqe *sqe = io_uring_get_sqe(ring);
|
||||
if (!sqe) {
|
||||
fprintf(stderr, "uring_queue full!");
|
||||
return -1;
|
||||
}
|
||||
|
||||
io_uring_prep_read(sqe, fd, buffers[i], sizes[i], offsets[i]);
|
||||
io_uring_sqe_set_data(sqe, (void*)(long)i);
|
||||
}
|
||||
|
||||
int submitted = io_uring_submit_and_wait(ring, n);
|
||||
if (submitted != n) {
|
||||
fprintf(stderr, "io_uring_submit(): submitted != %d, was %d", n, submitted);
|
||||
return -1;
|
||||
}
|
||||
int completed = 0;
|
||||
int bad = 0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
struct io_uring_cqe *cqe;
|
||||
int ret = io_uring_wait_cqe(ring, &cqe);
|
||||
if (ret < 0) {
|
||||
fprintf(stderr, "io_uring_wait_cqe failed: %s\n", strerror(-ret));
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (cqe->res < 0) {
|
||||
fprintf(stderr, "io_uring error: %s\n", strerror(-cqe->res));
|
||||
}
|
||||
io_uring_cqe_seen(ring, cqe);
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
|
||||
int uring_read_direct(int fd, io_uring* ring, int n, void** buffers, unsigned int* sizes, long* offsets) {
|
||||
#ifdef DEBUG_CHECKS
|
||||
if (!ring) {
|
||||
fprintf(stderr, "NULL ring!\n");
|
||||
return -1;
|
||||
}
|
||||
if (!buffers || !sizes || !offsets) {
|
||||
fprintf(stderr, "NULL arrays: buffers=%p sizes=%p offsets=%p\n",
|
||||
buffers, sizes, offsets);
|
||||
return -1;
|
||||
}
|
||||
for (int i = 0; i < n; i++) {
|
||||
if (((uintptr_t)buffers[i] & 511) != 0) {
|
||||
fprintf(stderr, "Buffer %d not aligned to 512 bytes, is %p\n", i, buffers[i]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
struct stat st;
|
||||
fstat(fd, &st);
|
||||
for (int i = 0; i < n; i++) {
|
||||
if (offsets[i] + sizes[i] >= st.st_size) {
|
||||
fprintf(stderr, "Read beyond EOF: offset %ld >= size %ld\n",
|
||||
offsets[i], st.st_size);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
unsigned ready = io_uring_cq_ready(ring);
|
||||
if (ready > 0) {
|
||||
fprintf(stderr, "Skipping %u leftover completions\n", ready);
|
||||
io_uring_cq_advance(ring, ready);
|
||||
}
|
||||
|
||||
for (int i = 0; i < n; i++) {
|
||||
struct io_uring_sqe *sqe = io_uring_get_sqe(ring);
|
||||
if (!sqe) {
|
||||
fprintf(stderr, "uring_queue full!");
|
||||
return -1;
|
||||
}
|
||||
|
||||
io_uring_prep_read(sqe, fd, buffers[i], sizes[i], offsets[i]);
|
||||
io_uring_sqe_set_data(sqe, (void*)(long)i); // Store buffer index
|
||||
}
|
||||
|
||||
int submitted = io_uring_submit_and_wait(ring, n);
|
||||
if (submitted != n) {
|
||||
fprintf(stderr, "io_uring_submit(): submitted != %d, was %d", n, submitted);
|
||||
return -1;
|
||||
}
|
||||
int completed = 0;
|
||||
int bad = 0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
struct io_uring_cqe *cqe;
|
||||
int ret = io_uring_wait_cqe(ring, &cqe);
|
||||
if (ret < 0) {
|
||||
fprintf(stderr, "io_uring_wait_cqe failed: %s\n", strerror(-ret));
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (cqe->res < 0) {
|
||||
fprintf(stderr, "io_uring error: %s\n", strerror(-cqe->res));
|
||||
}
|
||||
io_uring_cqe_seen(ring, cqe);
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
}
|
@@ -0,0 +1,81 @@
|
||||
package nu.marginalia;
|
||||
|
||||
import nu.marginalia.uring.UringFileReader;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.foreign.Arena;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.lang.foreign.ValueLayout;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.List;
|
||||
|
||||
public class UringFileReaderTest {
|
||||
Path testFile;
|
||||
@BeforeEach
|
||||
public void setUp() throws IOException {
|
||||
testFile = Files.createTempFile("UringFileReaderTest", ".dat");
|
||||
}
|
||||
@AfterEach
|
||||
public void tearDown() throws IOException {
|
||||
Files.deleteIfExists(testFile);
|
||||
}
|
||||
|
||||
void createTestFileWithLongs(int size) {
|
||||
ByteBuffer buffer = ByteBuffer.allocateDirect(size * 8);
|
||||
for (int i = 0; i < size; i++) {
|
||||
buffer.putLong(i);
|
||||
}
|
||||
buffer.flip();
|
||||
try (var fc = Files.newByteChannel(testFile, StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING)) {
|
||||
while (buffer.hasRemaining())
|
||||
fc.write(buffer);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void testUringFileReader() throws IOException {
|
||||
|
||||
createTestFileWithLongs(1024);
|
||||
|
||||
try (var dfr = new UringFileReader(testFile, false)) {
|
||||
MemorySegment buf1 = Arena.ofAuto().allocate(32, 8);
|
||||
MemorySegment buf2 = Arena.ofAuto().allocate(16, 8);
|
||||
|
||||
dfr.read(List.of(buf1, buf2), List.of(0L, 8L));
|
||||
|
||||
for (int i = 0; i < buf1.byteSize(); i+=8) {
|
||||
System.out.println(buf1.get(ValueLayout.JAVA_LONG, i));
|
||||
}
|
||||
|
||||
for (int i = 0; i < buf2.byteSize(); i+=8) {
|
||||
System.out.println(buf2.get(ValueLayout.JAVA_LONG, i));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
void testUringFileReaderUnaligned() throws IOException {
|
||||
createTestFileWithLongs(65536);
|
||||
|
||||
try (var dfr = new UringFileReader(testFile, true)) {
|
||||
var ret = dfr.readUnalignedInDirectMode(Arena.ofAuto(),
|
||||
new long[] { 10*8, 20*8, 5000*8, 5100*8},
|
||||
new int[] { 32*8, 10*8, 100*8, 100*8},
|
||||
4096);
|
||||
System.out.println(ret.get(0).get(ValueLayout.JAVA_LONG, 0));
|
||||
System.out.println(ret.get(1).get(ValueLayout.JAVA_LONG, 0));
|
||||
System.out.println(ret.get(2).get(ValueLayout.JAVA_LONG, 0));
|
||||
System.out.println(ret.get(3).get(ValueLayout.JAVA_LONG, 0));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user