1
1
mirror of https://github.com/MarginaliaSearch/MarginaliaSearch.git synced 2025-10-06 07:32:38 +02:00

Compare commits

...

1 Commits

Author SHA1 Message Date
Viktor Lofgren
4bb71b8439 (crawler) Correct content type probing to only run on URLs that are suspected to be binary 2024-12-26 14:26:23 +01:00

View File

@@ -42,24 +42,24 @@ class ContentTypeProberTest {
port = r.nextInt(10000) + 8000;
server = HttpServer.create(new InetSocketAddress("127.0.0.1", port), 10);
server.createContext("/html", exchange -> {
server.createContext("/html.gz", exchange -> {
exchange.getResponseHeaders().add("Content-Type", "text/html");
exchange.sendResponseHeaders(200, -1);
exchange.close();
});
server.createContext("/redir", exchange -> {
exchange.getResponseHeaders().add("Location", "/html");
server.createContext("/redir.gz", exchange -> {
exchange.getResponseHeaders().add("Location", "/html.gz");
exchange.sendResponseHeaders(301, -1);
exchange.close();
});
server.createContext("/bin", exchange -> {
server.createContext("/bin.gz", exchange -> {
exchange.getResponseHeaders().add("Content-Type", "application/binary");
exchange.sendResponseHeaders(200, -1);
exchange.close();
});
server.createContext("/timeout", exchange -> {
server.createContext("/timeout.gz", exchange -> {
try {
Thread.sleep(15_000);
} catch (InterruptedException e) {
@@ -73,10 +73,10 @@ class ContentTypeProberTest {
server.start();
htmlEndpoint = EdgeUrl.parse("http://localhost:" + port + "/html").get();
binaryEndpoint = EdgeUrl.parse("http://localhost:" + port + "/bin").get();
timeoutEndpoint = EdgeUrl.parse("http://localhost:" + port + "/timeout").get();
htmlRedirEndpoint = EdgeUrl.parse("http://localhost:" + port + "/redir").get();
htmlEndpoint = EdgeUrl.parse("http://localhost:" + port + "/html.gz").get();
binaryEndpoint = EdgeUrl.parse("http://localhost:" + port + "/bin.gz").get();
timeoutEndpoint = EdgeUrl.parse("http://localhost:" + port + "/timeout.gz").get();
htmlRedirEndpoint = EdgeUrl.parse("http://localhost:" + port + "/redir.gz").get();
fetcher = new HttpFetcherImpl("test");
recorder = new WarcRecorder(warcFile);