1
1
mirror of https://github.com/MarginaliaSearch/MarginaliaSearch.git synced 2025-10-06 17:32:39 +02:00

Compare commits

...

3 Commits

Author SHA1 Message Date
Viktor Lofgren
700364b86d (sample) Remove debug logging
The problem sat in the desk chair all along
2025-07-21 15:08:20 +02:00
Viktor Lofgren
7e725ddaed (sample) Remove debug logging
The problem sat in the desk chair all along
2025-07-21 14:41:59 +02:00
Viktor Lofgren
120209e138 (sample) Diagnosing compression errors 2025-07-21 14:34:08 +02:00
2 changed files with 4 additions and 4 deletions

View File

@@ -17,7 +17,6 @@ import org.xml.sax.SAXException;
import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.ParserConfigurationException;
import java.io.ByteArrayInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.net.URISyntaxException; import java.net.URISyntaxException;
@@ -87,7 +86,7 @@ public class DomSampleClassifier {
EdgeDomain sampleDomain = new EdgeDomain(sample.getDomainName()); EdgeDomain sampleDomain = new EdgeDomain(sample.getDomainName());
try (var compressedStream = new ZstdInputStream(new ByteArrayInputStream(sample.getHtmlSampleZstd().toByteArray()))) { try (var compressedStream = new ZstdInputStream(sample.getHtmlSampleZstd().newInput())) {
String html = new String(compressedStream.readAllBytes(), StandardCharsets.UTF_8); String html = new String(compressedStream.readAllBytes(), StandardCharsets.UTF_8);
var parsedDoc = Jsoup.parse(html); var parsedDoc = Jsoup.parse(html);
var fixedElements = parsedDoc.select("*[data-position=fixed]"); var fixedElements = parsedDoc.select("*[data-position=fixed]");
@@ -108,7 +107,7 @@ public class DomSampleClassifier {
} }
} }
catch (Exception ex) { catch (Exception ex) {
logger.warn("Error when parsing DOM HTML sample for size" + sample.getHtmlSampleZstd().size(), ex); logger.warn("Error when parsing DOM HTML sample", ex);
} }
// Classify outgoing requests // Classify outgoing requests

View File

@@ -12,3 +12,4 @@
2025-05-28: Deploy assistant and browserless. 2025-05-28: Deploy assistant and browserless.
2025-06-06: Deploy assistant and browserless. 2025-06-06: Deploy assistant and browserless.
2025-07-21: Deploy executor partition 1. 2025-07-21: Deploy executor partition 1.
2025-07-21: Deploy search.