1
1
mirror of https://github.com/MarginaliaSearch/MarginaliaSearch.git synced 2025-10-05 21:22:39 +02:00

Compare commits

...

1 Commits

Author SHA1 Message Date
Viktor Lofgren
8a944cf4c6 (crawler) Add request time to crawl data
This is an interesting indicator of website quality.
2025-05-19 14:07:41 +02:00

View File

@@ -328,6 +328,7 @@ public record SlopCrawlDataRecord(String domain,
private final LongColumn.Writer timestampColumnWriter;
private final EnumColumn.Writer contentTypeColumnWriter;
private final ByteArrayColumn.Writer bodyColumnWriter;
private final ShortColumn.Writer requestTimeColumnWriter;
private final StringColumn.Writer headerColumnWriter;
public Writer(Path path) throws IOException {
@@ -341,6 +342,7 @@ public record SlopCrawlDataRecord(String domain,
timestampColumnWriter = timestampColumn.create(this);
contentTypeColumnWriter = contentTypeColumn.create(this);
bodyColumnWriter = bodyColumn.create(this);
requestTimeColumnWriter = requestTimeColumn.create(this);
headerColumnWriter = headerColumn.create(this);
}
@@ -353,6 +355,7 @@ public record SlopCrawlDataRecord(String domain,
timestampColumnWriter.put(record.timestamp);
contentTypeColumnWriter.put(record.contentType);
bodyColumnWriter.put(record.body);
requestTimeColumnWriter.put((short) record.requestTimeMs);
headerColumnWriter.put(record.headers);
}