mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-06 07:32:38 +02:00
Compare commits
2 Commits
deploy-028
...
deploy-028
Author | SHA1 | Date | |
---|---|---|---|
|
a23ec521fe | ||
|
fff3babc6d |
@@ -15,6 +15,7 @@
|
|||||||
<classifier target="url-regex" rule="tracking">/ccm/collect$</classifier>
|
<classifier target="url-regex" rule="tracking">/ccm/collect$</classifier>
|
||||||
<classifier target="url-regex" rule="tracking">^/[0-9]+\.js$</classifier>
|
<classifier target="url-regex" rule="tracking">^/[0-9]+\.js$</classifier>
|
||||||
<classifier target="url-regex" rule="tracking">^/[a-z0-9]\.gif$</classifier>
|
<classifier target="url-regex" rule="tracking">^/[a-z0-9]\.gif$</classifier>
|
||||||
|
<classifier target="url-regex" rule="tracking">^/pixel\.gif$</classifier>
|
||||||
<classifier target="url-regex" rule="ads">/pagead/</classifier>
|
<classifier target="url-regex" rule="ads">/pagead/</classifier>
|
||||||
<classifier target="url-regex" rule="ads">/google-ads/</classifier>
|
<classifier target="url-regex" rule="ads">/google-ads/</classifier>
|
||||||
|
|
||||||
|
@@ -161,7 +161,6 @@ public class HtmlDocumentProcessorPlugin extends AbstractDocumentProcessorPlugin
|
|||||||
|
|
||||||
final Set<HtmlFeature> features = featureExtractor.getFeatures(url, doc, documentHeaders, dld);
|
final Set<HtmlFeature> features = featureExtractor.getFeatures(url, doc, documentHeaders, dld);
|
||||||
|
|
||||||
|
|
||||||
if (!documentLengthLogic.validateLength(dld, specialization.lengthModifier() * documentClass.lengthLimitModifier())) {
|
if (!documentLengthLogic.validateLength(dld, specialization.lengthModifier() * documentClass.lengthLimitModifier())) {
|
||||||
features.add(HtmlFeature.SHORT_DOCUMENT);
|
features.add(HtmlFeature.SHORT_DOCUMENT);
|
||||||
}
|
}
|
||||||
|
@@ -115,7 +115,9 @@ public class PdfDocumentProcessorPlugin extends AbstractDocumentProcessorPlugin
|
|||||||
|
|
||||||
ret.quality = -5;
|
ret.quality = -5;
|
||||||
|
|
||||||
ret.features = Set.of(HtmlFeature.PDF);
|
ret.features = new HashSet<>(); // must be mutable!
|
||||||
|
ret.features.add(HtmlFeature.PDF);
|
||||||
|
|
||||||
ret.description = getDescription(doc);
|
ret.description = getDescription(doc);
|
||||||
ret.hashCode = dld.localitySensitiveHashCode();
|
ret.hashCode = dld.localitySensitiveHashCode();
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user