1
1
mirror of https://github.com/MarginaliaSearch/MarginaliaSearch.git synced 2025-10-06 17:32:39 +02:00

Compare commits

...

2 Commits

2 changed files with 8 additions and 3 deletions

View File

@@ -59,9 +59,14 @@ public final class CrawledDocument implements SerializableCrawlData {
}
public Document parseBody() throws IOException {
// Prevent stalls from parsing excessively large documents
byte[] bytes = documentBodyBytes.length > 200_000
? Arrays.copyOf(documentBodyBytes, 200_000) : documentBodyBytes;
return DocumentBodyToString.getParsedData(
ContentType.parse(contentType),
documentBodyBytes,
bytes,
url);
}

View File

@@ -228,7 +228,7 @@ public class LiveCrawlDataSet implements AutoCloseable {
}
@Override
public boolean hasNext() throws IOException {
public boolean hasNext() {
if (dataStack == null) {
query();
}
@@ -236,7 +236,7 @@ public class LiveCrawlDataSet implements AutoCloseable {
}
@Override
public void close() throws Exception {
public void close() {
dataStack.clear();
}
}