mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-06 07:32:38 +02:00
Compare commits
2 Commits
deploy-028
...
deploy-028
Author | SHA1 | Date | |
---|---|---|---|
|
a23ec521fe | ||
|
fff3babc6d |
@@ -15,6 +15,7 @@
|
||||
<classifier target="url-regex" rule="tracking">/ccm/collect$</classifier>
|
||||
<classifier target="url-regex" rule="tracking">^/[0-9]+\.js$</classifier>
|
||||
<classifier target="url-regex" rule="tracking">^/[a-z0-9]\.gif$</classifier>
|
||||
<classifier target="url-regex" rule="tracking">^/pixel\.gif$</classifier>
|
||||
<classifier target="url-regex" rule="ads">/pagead/</classifier>
|
||||
<classifier target="url-regex" rule="ads">/google-ads/</classifier>
|
||||
|
||||
|
@@ -161,7 +161,6 @@ public class HtmlDocumentProcessorPlugin extends AbstractDocumentProcessorPlugin
|
||||
|
||||
final Set<HtmlFeature> features = featureExtractor.getFeatures(url, doc, documentHeaders, dld);
|
||||
|
||||
|
||||
if (!documentLengthLogic.validateLength(dld, specialization.lengthModifier() * documentClass.lengthLimitModifier())) {
|
||||
features.add(HtmlFeature.SHORT_DOCUMENT);
|
||||
}
|
||||
|
@@ -115,7 +115,9 @@ public class PdfDocumentProcessorPlugin extends AbstractDocumentProcessorPlugin
|
||||
|
||||
ret.quality = -5;
|
||||
|
||||
ret.features = Set.of(HtmlFeature.PDF);
|
||||
ret.features = new HashSet<>(); // must be mutable!
|
||||
ret.features.add(HtmlFeature.PDF);
|
||||
|
||||
ret.description = getDescription(doc);
|
||||
ret.hashCode = dld.localitySensitiveHashCode();
|
||||
|
||||
|
Reference in New Issue
Block a user