diff --git a/code/processes/converting-process/test/nu/marginalia/converting/ConvertingIntegrationTest.java b/code/processes/converting-process/test/nu/marginalia/converting/ConvertingIntegrationTest.java index c9defdefe..3bb26f6f5 100644 --- a/code/processes/converting-process/test/nu/marginalia/converting/ConvertingIntegrationTest.java +++ b/code/processes/converting-process/test/nu/marginalia/converting/ConvertingIntegrationTest.java @@ -4,21 +4,24 @@ package nu.marginalia.converting; import com.google.inject.Guice; import com.google.inject.Injector; import nu.marginalia.converting.model.ProcessedDocument; +import nu.marginalia.converting.model.ProcessedDomain; import nu.marginalia.converting.processor.DomainProcessor; import nu.marginalia.io.SerializableCrawlDataStream; import nu.marginalia.model.DocumentFormat; import nu.marginalia.model.EdgeDomain; import nu.marginalia.model.crawl.DomainIndexingState; +import nu.marginalia.model.crawl.HtmlFeature; import nu.marginalia.model.crawl.PubDate; import nu.marginalia.model.crawl.UrlIndexingState; import nu.marginalia.model.crawldata.CrawledDocument; import nu.marginalia.model.crawldata.CrawledDomain; import nu.marginalia.model.crawldata.SerializableCrawlData; -import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; import java.io.IOException; +import java.nio.file.Files; import java.nio.file.Path; import java.time.LocalTime; import java.util.*; @@ -28,10 +31,10 @@ import static org.junit.jupiter.api.Assertions.*; @Tag("slow") public class ConvertingIntegrationTest { - private DomainProcessor domainProcessor; + private static DomainProcessor domainProcessor; - @BeforeEach - public void setUp() { + @BeforeAll + public static void setUp() { Injector injector = Guice.createInjector( new ConvertingIntegrationTestModule() ); @@ -51,6 +54,25 @@ public class ConvertingIntegrationTest { assertEquals(ret.domain, new EdgeDomain("memex.marginalia.nu")); assertTrue(ret.documents.isEmpty()); } + + @Test + public void testBuggyCase() throws IOException { + + // Test used to inspect processing of crawl data, change path below to use + Path problemCase = Path.of("/home/vlofgren/TestEnv/index-1/storage/crawl-data__25-09-15T16_33_57.245/46/64/4664ef43-blog.fermi.chat.slop.zip"); + if (!Files.exists(problemCase)) + return; + + ProcessedDomain result = domainProcessor.fullProcessing(SerializableCrawlDataStream.openDataStream(problemCase)); + for (ProcessedDocument doc : result.documents) { + System.out.println(doc.url); + if (doc.details == null) continue; + + System.out.println(doc.details.features); + System.out.println(HtmlFeature.encode(doc.details.features) & HtmlFeature.AFFILIATE_LINK.getFeatureBit()); + } + } + @Test public void testMemexMarginaliaNuDateInternalConsistency() throws IOException { var ret = domainProcessor.fullProcessing(asSerializableCrawlData(readMarginaliaWorkingSet())); diff --git a/code/processes/converting-process/test/nu/marginalia/converting/ConvertingIntegrationTestModule.java b/code/processes/converting-process/test/nu/marginalia/converting/ConvertingIntegrationTestModule.java index d14750ef7..ed1a51f0f 100644 --- a/code/processes/converting-process/test/nu/marginalia/converting/ConvertingIntegrationTestModule.java +++ b/code/processes/converting-process/test/nu/marginalia/converting/ConvertingIntegrationTestModule.java @@ -4,6 +4,7 @@ import com.google.inject.AbstractModule; import com.google.inject.name.Names; import nu.marginalia.LanguageModels; import nu.marginalia.WmsaHome; +import nu.marginalia.api.domsample.DomSampleClient; import nu.marginalia.converting.processor.ConverterDomainTypes; import nu.marginalia.process.ProcessConfiguration; import nu.marginalia.service.module.ServiceConfiguration; @@ -23,5 +24,8 @@ public class ConvertingIntegrationTestModule extends AbstractModule { )); bind(LanguageModels.class).toInstance(WmsaHome.getLanguageModels()); bind(ConverterDomainTypes.class).toInstance(Mockito.mock(ConverterDomainTypes.class)); + + DomSampleClient domSampleClientMock = Mockito.mock(DomSampleClient.class); + bind(DomSampleClient.class).toInstance(domSampleClientMock); } }