mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-06 07:32:38 +02:00
Compare commits
3 Commits
deploy-029
...
deploy-029
Author | SHA1 | Date | |
---|---|---|---|
|
700364b86d | ||
|
7e725ddaed | ||
|
120209e138 |
@@ -17,7 +17,6 @@ import org.xml.sax.SAXException;
|
|||||||
import javax.xml.parsers.DocumentBuilder;
|
import javax.xml.parsers.DocumentBuilder;
|
||||||
import javax.xml.parsers.DocumentBuilderFactory;
|
import javax.xml.parsers.DocumentBuilderFactory;
|
||||||
import javax.xml.parsers.ParserConfigurationException;
|
import javax.xml.parsers.ParserConfigurationException;
|
||||||
import java.io.ByteArrayInputStream;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.net.URISyntaxException;
|
import java.net.URISyntaxException;
|
||||||
@@ -87,7 +86,7 @@ public class DomSampleClassifier {
|
|||||||
|
|
||||||
EdgeDomain sampleDomain = new EdgeDomain(sample.getDomainName());
|
EdgeDomain sampleDomain = new EdgeDomain(sample.getDomainName());
|
||||||
|
|
||||||
try (var compressedStream = new ZstdInputStream(new ByteArrayInputStream(sample.getHtmlSampleZstd().toByteArray()))) {
|
try (var compressedStream = new ZstdInputStream(sample.getHtmlSampleZstd().newInput())) {
|
||||||
String html = new String(compressedStream.readAllBytes(), StandardCharsets.UTF_8);
|
String html = new String(compressedStream.readAllBytes(), StandardCharsets.UTF_8);
|
||||||
var parsedDoc = Jsoup.parse(html);
|
var parsedDoc = Jsoup.parse(html);
|
||||||
var fixedElements = parsedDoc.select("*[data-position=fixed]");
|
var fixedElements = parsedDoc.select("*[data-position=fixed]");
|
||||||
@@ -108,7 +107,7 @@ public class DomSampleClassifier {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (Exception ex) {
|
catch (Exception ex) {
|
||||||
logger.warn("Error when parsing DOM HTML sample for size" + sample.getHtmlSampleZstd().size(), ex);
|
logger.warn("Error when parsing DOM HTML sample", ex);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Classify outgoing requests
|
// Classify outgoing requests
|
||||||
|
@@ -11,4 +11,5 @@
|
|||||||
2025-05-17: Redeploy all.
|
2025-05-17: Redeploy all.
|
||||||
2025-05-28: Deploy assistant and browserless.
|
2025-05-28: Deploy assistant and browserless.
|
||||||
2025-06-06: Deploy assistant and browserless.
|
2025-06-06: Deploy assistant and browserless.
|
||||||
2025-07-21: Deploy executor partition 1.
|
2025-07-21: Deploy executor partition 1.
|
||||||
|
2025-07-21: Deploy search.
|
Reference in New Issue
Block a user