mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-05 21:22:39 +02:00
Compare commits
2 Commits
deploy-007
...
deploy-007
Author | SHA1 | Date | |
---|---|---|---|
|
db138b2a6f | ||
|
1673fc284c |
@@ -59,9 +59,14 @@ public final class CrawledDocument implements SerializableCrawlData {
|
||||
}
|
||||
|
||||
public Document parseBody() throws IOException {
|
||||
// Prevent stalls from parsing excessively large documents
|
||||
|
||||
byte[] bytes = documentBodyBytes.length > 200_000
|
||||
? Arrays.copyOf(documentBodyBytes, 200_000) : documentBodyBytes;
|
||||
|
||||
return DocumentBodyToString.getParsedData(
|
||||
ContentType.parse(contentType),
|
||||
documentBodyBytes,
|
||||
bytes,
|
||||
url);
|
||||
}
|
||||
|
||||
|
@@ -228,7 +228,7 @@ public class LiveCrawlDataSet implements AutoCloseable {
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() throws IOException {
|
||||
public boolean hasNext() {
|
||||
if (dataStack == null) {
|
||||
query();
|
||||
}
|
||||
@@ -236,7 +236,7 @@ public class LiveCrawlDataSet implements AutoCloseable {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws Exception {
|
||||
public void close() {
|
||||
dataStack.clear();
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user