From b688f1555092817b4b9b06e2579c953d771efaa9 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Tue, 30 Sep 2025 11:48:43 +0200 Subject: [PATCH] (proxy) Fix late binding of proxy configuration The code was selecting the proxy too late, so that it ended up being hardcoded for the entire crawl run, thus breaking the proxy selection logic. There was also a problem where the socket configuration was overwritten by another socket configuration, thus disabling the proxy injection. --- .../proxy/SocksProxyHttpClientFactory.java | 43 ------------------- .../marginalia/proxy/SocksProxyManager.java | 14 +++--- .../crawl/fetcher/HttpFetcherImpl.java | 25 ++++++----- .../livecrawler/io/HttpClientProvider.java | 24 +++++++---- .../marginalia/ndp/io/HttpClientProvider.java | 25 ++++++----- .../ping/io/HttpClientProvider.java | 25 ++++++----- 6 files changed, 67 insertions(+), 89 deletions(-) delete mode 100644 code/common/config/java/nu/marginalia/proxy/SocksProxyHttpClientFactory.java diff --git a/code/common/config/java/nu/marginalia/proxy/SocksProxyHttpClientFactory.java b/code/common/config/java/nu/marginalia/proxy/SocksProxyHttpClientFactory.java deleted file mode 100644 index f9d631025..000000000 --- a/code/common/config/java/nu/marginalia/proxy/SocksProxyHttpClientFactory.java +++ /dev/null @@ -1,43 +0,0 @@ -package nu.marginalia.proxy; - -import org.apache.hc.client5.http.impl.io.PoolingHttpClientConnectionManagerBuilder; -import org.apache.hc.core5.http.io.SocketConfig; -import org.apache.hc.core5.util.Timeout; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.net.InetSocketAddress; - -/** - * Utility class for configuring HTTP clients with SOCKS proxy support. - */ -public class SocksProxyHttpClientFactory { - private static final Logger logger = LoggerFactory.getLogger(SocksProxyHttpClientFactory.class); - - /** - * Configures a connection manager builder with SOCKS proxy support. - * If no proxy is provided, uses default socket configuration. - */ - public static void configureConnectionManager(PoolingHttpClientConnectionManagerBuilder builder, - SocksProxyConfiguration.SocksProxy proxy) { - if (proxy != null) { - logger.debug("Configuring HTTP client with SOCKS proxy: {}", proxy); - - // Create SOCKS proxy address - InetSocketAddress socksProxyAddress = new InetSocketAddress(proxy.getHost(), proxy.getPort()); - - // Configure socket config with SOCKS proxy - SocketConfig socketConfig = SocketConfig.custom() - .setSocksProxyAddress(socksProxyAddress) - .setSoTimeout(Timeout.ofSeconds(30)) - .build(); - - // Apply the socket configuration to the connection manager - builder.setDefaultSocketConfig(socketConfig); - - logger.info("SOCKS proxy configured: {}:{}", proxy.getHost(), proxy.getPort()); - } else { - logger.debug("Configuring HTTP client without proxy"); - } - } -} diff --git a/code/common/config/java/nu/marginalia/proxy/SocksProxyManager.java b/code/common/config/java/nu/marginalia/proxy/SocksProxyManager.java index c2af569ce..8d172e4e2 100644 --- a/code/common/config/java/nu/marginalia/proxy/SocksProxyManager.java +++ b/code/common/config/java/nu/marginalia/proxy/SocksProxyManager.java @@ -3,9 +3,10 @@ package nu.marginalia.proxy; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import javax.annotation.Nonnull; import java.util.List; -import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.atomic.AtomicInteger; /** * Manages SOCKS proxy selection and rotation for crawler requests. @@ -29,19 +30,19 @@ public class SocksProxyManager { logger.info("SOCKS proxy support disabled"); } } - + /** * Selects the next proxy to use based on the configured strategy. - * Returns null if proxy support is disabled or no proxies are available. */ + @Nonnull public SocksProxyConfiguration.SocksProxy selectProxy() { if (!config.isEnabled()) { - return null; + throw new IllegalStateException("Proxies not configured"); } List proxies = config.getProxies(); if (proxies.isEmpty()) { - return null; + throw new IllegalStateException("Proxies not configured"); } SocksProxyConfiguration.SocksProxy selectedProxy; @@ -59,7 +60,6 @@ public class SocksProxyManager { break; } - logger.debug("Selected SOCKS proxy: {}", selectedProxy); return selectedProxy; } @@ -74,6 +74,6 @@ public class SocksProxyManager { * Checks if proxy support is enabled and proxies are available. */ public boolean isProxyEnabled() { - return config.isEnabled(); + return config.isEnabled() && !config.getProxies().isEmpty(); } } diff --git a/code/processes/crawling-process/java/nu/marginalia/crawl/fetcher/HttpFetcherImpl.java b/code/processes/crawling-process/java/nu/marginalia/crawl/fetcher/HttpFetcherImpl.java index 041e45948..3b0187c06 100644 --- a/code/processes/crawling-process/java/nu/marginalia/crawl/fetcher/HttpFetcherImpl.java +++ b/code/processes/crawling-process/java/nu/marginalia/crawl/fetcher/HttpFetcherImpl.java @@ -16,7 +16,6 @@ import nu.marginalia.model.body.HttpFetchResult; import nu.marginalia.model.crawldata.CrawlerDomainStatus; import nu.marginalia.proxy.SocksProxyConfiguration; import nu.marginalia.proxy.SocksProxyManager; -import nu.marginalia.proxy.SocksProxyHttpClientFactory; import org.apache.hc.client5.http.ConnectionKeepAliveStrategy; import org.apache.hc.client5.http.HttpRequestRetryStrategy; import org.apache.hc.client5.http.classic.HttpClient; @@ -52,6 +51,7 @@ import org.slf4j.MarkerFactory; import javax.net.ssl.SSLContext; import javax.net.ssl.SSLException; import java.io.IOException; +import java.net.InetSocketAddress; import java.net.SocketTimeoutException; import java.net.URISyntaxException; import java.net.UnknownHostException; @@ -105,18 +105,23 @@ public class HttpFetcherImpl implements HttpFetcher, HttpRequestRetryStrategy { .setDefaultConnectionConfig(connectionConfig) .setTlsSocketStrategy(new DefaultClientTlsStrategy(SSLContext.getDefault())); - // Configure SOCKS proxy if enabled - SocksProxyConfiguration.SocksProxy selectedProxy = proxyManager.selectProxy(); - SocksProxyHttpClientFactory.configureConnectionManager(connectionManagerBuilder, selectedProxy); + connectionManagerBuilder.setSocketConfigResolver(route -> { + SocketConfig.Builder socketConfigBuilder = SocketConfig.custom(); + // Configure SOCKS proxy if enabled + if (proxyManager.isProxyEnabled()) { + SocksProxyConfiguration.SocksProxy selectedProxy = proxyManager.selectProxy(); + InetSocketAddress socksProxyAddress = new InetSocketAddress(selectedProxy.getHost(), selectedProxy.getPort()); + socketConfigBuilder.setSocksProxyAddress(socksProxyAddress); + } + socketConfigBuilder + .setSoTimeout(Timeout.ofSeconds(10)) + .setSoLinger(TimeValue.ofSeconds(-1)); + + return socketConfigBuilder.build(); + }); connectionManager = connectionManagerBuilder.build(); - connectionManager.setDefaultSocketConfig(SocketConfig.custom() - .setSoLinger(TimeValue.ofSeconds(-1)) - .setSoTimeout(Timeout.ofSeconds(10)) - .build() - ); - Thread.ofPlatform().daemon(true).start(() -> { try { for (;;) { diff --git a/code/processes/live-crawling-process/java/nu/marginalia/livecrawler/io/HttpClientProvider.java b/code/processes/live-crawling-process/java/nu/marginalia/livecrawler/io/HttpClientProvider.java index ce397aba1..c3106aa1b 100644 --- a/code/processes/live-crawling-process/java/nu/marginalia/livecrawler/io/HttpClientProvider.java +++ b/code/processes/live-crawling-process/java/nu/marginalia/livecrawler/io/HttpClientProvider.java @@ -3,7 +3,6 @@ package nu.marginalia.livecrawler.io; import com.google.inject.Provider; import nu.marginalia.proxy.SocksProxyConfiguration; import nu.marginalia.proxy.SocksProxyManager; -import nu.marginalia.proxy.SocksProxyHttpClientFactory; import org.apache.hc.client5.http.ConnectionKeepAliveStrategy; import org.apache.hc.client5.http.classic.HttpClient; import org.apache.hc.client5.http.config.ConnectionConfig; @@ -24,6 +23,7 @@ import org.apache.hc.core5.util.Timeout; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.net.InetSocketAddress; import java.security.KeyManagementException; import java.security.NoSuchAlgorithmException; import java.util.Iterator; @@ -58,17 +58,23 @@ public class HttpClientProvider implements Provider { .setDefaultConnectionConfig(connectionConfig); // Configure SOCKS proxy if enabled - SocksProxyConfiguration.SocksProxy selectedProxy = proxyManager.selectProxy(); - SocksProxyHttpClientFactory.configureConnectionManager(connectionManagerBuilder, selectedProxy); + connectionManagerBuilder.setSocketConfigResolver(route -> { + SocketConfig.Builder socketConfigBuilder = SocketConfig.custom(); + // Configure SOCKS proxy if enabled + if (proxyManager.isProxyEnabled()) { + SocksProxyConfiguration.SocksProxy selectedProxy = proxyManager.selectProxy(); + InetSocketAddress socksProxyAddress = new InetSocketAddress(selectedProxy.getHost(), selectedProxy.getPort()); + socketConfigBuilder.setSocksProxyAddress(socksProxyAddress); + } + socketConfigBuilder + .setSoTimeout(Timeout.ofSeconds(30)) + .setSoLinger(TimeValue.ofSeconds(-1)); + + return socketConfigBuilder.build(); + }); connectionManager = connectionManagerBuilder.build(); - connectionManager.setDefaultSocketConfig(SocketConfig.custom() - .setSoLinger(TimeValue.ofSeconds(-1)) - .setSoTimeout(Timeout.ofSeconds(10)) - .build() - ); - Thread.ofPlatform().daemon(true).start(() -> { try { for (;;) { diff --git a/code/processes/new-domain-process/java/nu/marginalia/ndp/io/HttpClientProvider.java b/code/processes/new-domain-process/java/nu/marginalia/ndp/io/HttpClientProvider.java index 399912172..777038996 100644 --- a/code/processes/new-domain-process/java/nu/marginalia/ndp/io/HttpClientProvider.java +++ b/code/processes/new-domain-process/java/nu/marginalia/ndp/io/HttpClientProvider.java @@ -3,7 +3,6 @@ package nu.marginalia.ndp.io; import com.google.inject.Provider; import nu.marginalia.proxy.SocksProxyConfiguration; import nu.marginalia.proxy.SocksProxyManager; -import nu.marginalia.proxy.SocksProxyHttpClientFactory; import org.apache.hc.client5.http.ConnectionKeepAliveStrategy; import org.apache.hc.client5.http.classic.HttpClient; import org.apache.hc.client5.http.config.ConnectionConfig; @@ -24,6 +23,7 @@ import org.apache.hc.core5.util.Timeout; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.net.InetSocketAddress; import java.security.KeyManagementException; import java.security.NoSuchAlgorithmException; import java.util.Iterator; @@ -57,18 +57,23 @@ public class HttpClientProvider implements Provider { .setMaxConnTotal(50) .setDefaultConnectionConfig(connectionConfig); - // Configure SOCKS proxy if enabled - SocksProxyConfiguration.SocksProxy selectedProxy = proxyManager.selectProxy(); - SocksProxyHttpClientFactory.configureConnectionManager(connectionManagerBuilder, selectedProxy); + connectionManagerBuilder.setSocketConfigResolver(route -> { + SocketConfig.Builder socketConfigBuilder = SocketConfig.custom(); + // Configure SOCKS proxy if enabled + if (proxyManager.isProxyEnabled()) { + SocksProxyConfiguration.SocksProxy selectedProxy = proxyManager.selectProxy(); + InetSocketAddress socksProxyAddress = new InetSocketAddress(selectedProxy.getHost(), selectedProxy.getPort()); + socketConfigBuilder.setSocksProxyAddress(socksProxyAddress); + } + socketConfigBuilder + .setSoTimeout(Timeout.ofSeconds(10)) + .setSoLinger(TimeValue.ofSeconds(-1)); + + return socketConfigBuilder.build(); + }); connectionManager = connectionManagerBuilder.build(); - connectionManager.setDefaultSocketConfig(SocketConfig.custom() - .setSoLinger(TimeValue.ofSeconds(-1)) - .setSoTimeout(Timeout.ofSeconds(10)) - .build() - ); - Thread.ofPlatform().daemon(true).start(() -> { try { for (;;) { diff --git a/code/processes/ping-process/java/nu/marginalia/ping/io/HttpClientProvider.java b/code/processes/ping-process/java/nu/marginalia/ping/io/HttpClientProvider.java index d83b8c36b..7ef70b88c 100644 --- a/code/processes/ping-process/java/nu/marginalia/ping/io/HttpClientProvider.java +++ b/code/processes/ping-process/java/nu/marginalia/ping/io/HttpClientProvider.java @@ -3,7 +3,6 @@ package nu.marginalia.ping.io; import com.google.inject.Provider; import nu.marginalia.proxy.SocksProxyConfiguration; import nu.marginalia.proxy.SocksProxyManager; -import nu.marginalia.proxy.SocksProxyHttpClientFactory; import org.apache.hc.client5.http.ConnectionKeepAliveStrategy; import org.apache.hc.client5.http.classic.HttpClient; import org.apache.hc.client5.http.config.ConnectionConfig; @@ -30,6 +29,7 @@ import org.slf4j.LoggerFactory; import javax.net.ssl.SSLContext; import javax.net.ssl.TrustManager; import javax.net.ssl.X509TrustManager; +import java.net.InetSocketAddress; import java.security.KeyManagementException; import java.security.NoSuchAlgorithmException; import java.security.cert.X509Certificate; @@ -97,18 +97,23 @@ public class HttpClientProvider implements Provider { .setTlsSocketStrategy( new DefaultClientTlsStrategy(sslContext, NoopHostnameVerifier.INSTANCE)); - // Configure SOCKS proxy if enabled - SocksProxyConfiguration.SocksProxy selectedProxy = proxyManager.selectProxy(); - SocksProxyHttpClientFactory.configureConnectionManager(connectionManagerBuilder, selectedProxy); + connectionManagerBuilder.setSocketConfigResolver(route -> { + SocketConfig.Builder socketConfigBuilder = SocketConfig.custom(); + // Configure SOCKS proxy if enabled + if (proxyManager.isProxyEnabled()) { + SocksProxyConfiguration.SocksProxy selectedProxy = proxyManager.selectProxy(); + InetSocketAddress socksProxyAddress = new InetSocketAddress(selectedProxy.getHost(), selectedProxy.getPort()); + socketConfigBuilder.setSocksProxyAddress(socksProxyAddress); + } + socketConfigBuilder + .setSoTimeout(Timeout.ofSeconds(10)) + .setSoLinger(TimeValue.ofSeconds(-1)); + + return socketConfigBuilder.build(); + }); connectionManager = connectionManagerBuilder.build(); - connectionManager.setDefaultSocketConfig(SocketConfig.custom() - .setSoLinger(TimeValue.ofSeconds(-1)) - .setSoTimeout(Timeout.ofSeconds(10)) - .build() - ); - Thread.ofPlatform().daemon(true).start(() -> { try { for (;;) {