1
1
mirror of https://github.com/MarginaliaSearch/MarginaliaSearch.git synced 2025-10-06 07:32:38 +02:00

Compare commits

...

3 Commits

Author SHA1 Message Date
Viktor Lofgren
700364b86d (sample) Remove debug logging
The problem sat in the desk chair all along
2025-07-21 15:08:20 +02:00
Viktor Lofgren
7e725ddaed (sample) Remove debug logging
The problem sat in the desk chair all along
2025-07-21 14:41:59 +02:00
Viktor Lofgren
120209e138 (sample) Diagnosing compression errors 2025-07-21 14:34:08 +02:00
2 changed files with 4 additions and 4 deletions

View File

@@ -17,7 +17,6 @@ import org.xml.sax.SAXException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URISyntaxException;
@@ -87,7 +86,7 @@ public class DomSampleClassifier {
EdgeDomain sampleDomain = new EdgeDomain(sample.getDomainName());
try (var compressedStream = new ZstdInputStream(new ByteArrayInputStream(sample.getHtmlSampleZstd().toByteArray()))) {
try (var compressedStream = new ZstdInputStream(sample.getHtmlSampleZstd().newInput())) {
String html = new String(compressedStream.readAllBytes(), StandardCharsets.UTF_8);
var parsedDoc = Jsoup.parse(html);
var fixedElements = parsedDoc.select("*[data-position=fixed]");
@@ -108,7 +107,7 @@ public class DomSampleClassifier {
}
}
catch (Exception ex) {
logger.warn("Error when parsing DOM HTML sample for size" + sample.getHtmlSampleZstd().size(), ex);
logger.warn("Error when parsing DOM HTML sample", ex);
}
// Classify outgoing requests

View File

@@ -12,3 +12,4 @@
2025-05-28: Deploy assistant and browserless.
2025-06-06: Deploy assistant and browserless.
2025-07-21: Deploy executor partition 1.
2025-07-21: Deploy search.