1
1
mirror of https://github.com/MarginaliaSearch/MarginaliaSearch.git synced 2025-10-06 07:32:38 +02:00

Compare commits

...

3 Commits

Author SHA1 Message Date
Viktor Lofgren
a23ec521fe (converter) Ensure features is mutable on DetailsWithWords as this is assumed later 2025-07-21 12:50:04 +02:00
Viktor Lofgren
fff3babc6d (classier) Add rule for */pixel.gif as likely tracking pixels 2025-07-21 12:35:57 +02:00
Viktor Lofgren
b2bfb8217c (special) Trigger CD run 2025-07-21 12:28:24 +02:00
4 changed files with 6 additions and 3 deletions

View File

@@ -15,6 +15,7 @@
<classifier target="url-regex" rule="tracking">/ccm/collect$</classifier>
<classifier target="url-regex" rule="tracking">^/[0-9]+\.js$</classifier>
<classifier target="url-regex" rule="tracking">^/[a-z0-9]\.gif$</classifier>
<classifier target="url-regex" rule="tracking">^/pixel\.gif$</classifier>
<classifier target="url-regex" rule="ads">/pagead/</classifier>
<classifier target="url-regex" rule="ads">/google-ads/</classifier>

View File

@@ -161,7 +161,6 @@ public class HtmlDocumentProcessorPlugin extends AbstractDocumentProcessorPlugin
final Set<HtmlFeature> features = featureExtractor.getFeatures(url, doc, documentHeaders, dld);
if (!documentLengthLogic.validateLength(dld, specialization.lengthModifier() * documentClass.lengthLimitModifier())) {
features.add(HtmlFeature.SHORT_DOCUMENT);
}

View File

@@ -115,7 +115,9 @@ public class PdfDocumentProcessorPlugin extends AbstractDocumentProcessorPlugin
ret.quality = -5;
ret.features = Set.of(HtmlFeature.PDF);
ret.features = new HashSet<>(); // must be mutable!
ret.features.add(HtmlFeature.PDF);
ret.description = getDescription(doc);
ret.hashCode = dld.localitySensitiveHashCode();

View File

@@ -10,4 +10,5 @@
2025-05-08: Deploy assistant.
2025-05-17: Redeploy all.
2025-05-28: Deploy assistant and browserless.
2025-06-06: Deploy assistant and browserless.
2025-06-06: Deploy assistant and browserless.
2025-07-21: Deploy executor partition 1.