1
1
mirror of https://github.com/MarginaliaSearch/MarginaliaSearch.git synced 2025-10-05 21:22:39 +02:00

(proxy) Fix late binding of proxy configuration

The code was selecting the proxy too late, so that it ended up being hardcoded for the entire crawl run, thus breaking the proxy selection logic.

There was also a problem where the socket configuration was overwritten by another socket configuration, thus disabling the proxy injection.
This commit is contained in:
Viktor Lofgren
2025-09-30 11:48:43 +02:00
parent 164a646af6
commit b688f15550
6 changed files with 67 additions and 89 deletions

View File

@@ -1,43 +0,0 @@
package nu.marginalia.proxy;
import org.apache.hc.client5.http.impl.io.PoolingHttpClientConnectionManagerBuilder;
import org.apache.hc.core5.http.io.SocketConfig;
import org.apache.hc.core5.util.Timeout;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.net.InetSocketAddress;
/**
* Utility class for configuring HTTP clients with SOCKS proxy support.
*/
public class SocksProxyHttpClientFactory {
private static final Logger logger = LoggerFactory.getLogger(SocksProxyHttpClientFactory.class);
/**
* Configures a connection manager builder with SOCKS proxy support.
* If no proxy is provided, uses default socket configuration.
*/
public static void configureConnectionManager(PoolingHttpClientConnectionManagerBuilder builder,
SocksProxyConfiguration.SocksProxy proxy) {
if (proxy != null) {
logger.debug("Configuring HTTP client with SOCKS proxy: {}", proxy);
// Create SOCKS proxy address
InetSocketAddress socksProxyAddress = new InetSocketAddress(proxy.getHost(), proxy.getPort());
// Configure socket config with SOCKS proxy
SocketConfig socketConfig = SocketConfig.custom()
.setSocksProxyAddress(socksProxyAddress)
.setSoTimeout(Timeout.ofSeconds(30))
.build();
// Apply the socket configuration to the connection manager
builder.setDefaultSocketConfig(socketConfig);
logger.info("SOCKS proxy configured: {}:{}", proxy.getHost(), proxy.getPort());
} else {
logger.debug("Configuring HTTP client without proxy");
}
}
}

View File

@@ -3,9 +3,10 @@ package nu.marginalia.proxy;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nonnull;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.atomic.AtomicInteger;
/**
* Manages SOCKS proxy selection and rotation for crawler requests.
@@ -29,19 +30,19 @@ public class SocksProxyManager {
logger.info("SOCKS proxy support disabled");
}
}
/**
* Selects the next proxy to use based on the configured strategy.
* Returns null if proxy support is disabled or no proxies are available.
*/
@Nonnull
public SocksProxyConfiguration.SocksProxy selectProxy() {
if (!config.isEnabled()) {
return null;
throw new IllegalStateException("Proxies not configured");
}
List<SocksProxyConfiguration.SocksProxy> proxies = config.getProxies();
if (proxies.isEmpty()) {
return null;
throw new IllegalStateException("Proxies not configured");
}
SocksProxyConfiguration.SocksProxy selectedProxy;
@@ -59,7 +60,6 @@ public class SocksProxyManager {
break;
}
logger.debug("Selected SOCKS proxy: {}", selectedProxy);
return selectedProxy;
}
@@ -74,6 +74,6 @@ public class SocksProxyManager {
* Checks if proxy support is enabled and proxies are available.
*/
public boolean isProxyEnabled() {
return config.isEnabled();
return config.isEnabled() && !config.getProxies().isEmpty();
}
}

View File

@@ -16,7 +16,6 @@ import nu.marginalia.model.body.HttpFetchResult;
import nu.marginalia.model.crawldata.CrawlerDomainStatus;
import nu.marginalia.proxy.SocksProxyConfiguration;
import nu.marginalia.proxy.SocksProxyManager;
import nu.marginalia.proxy.SocksProxyHttpClientFactory;
import org.apache.hc.client5.http.ConnectionKeepAliveStrategy;
import org.apache.hc.client5.http.HttpRequestRetryStrategy;
import org.apache.hc.client5.http.classic.HttpClient;
@@ -52,6 +51,7 @@ import org.slf4j.MarkerFactory;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLException;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.SocketTimeoutException;
import java.net.URISyntaxException;
import java.net.UnknownHostException;
@@ -105,18 +105,23 @@ public class HttpFetcherImpl implements HttpFetcher, HttpRequestRetryStrategy {
.setDefaultConnectionConfig(connectionConfig)
.setTlsSocketStrategy(new DefaultClientTlsStrategy(SSLContext.getDefault()));
// Configure SOCKS proxy if enabled
SocksProxyConfiguration.SocksProxy selectedProxy = proxyManager.selectProxy();
SocksProxyHttpClientFactory.configureConnectionManager(connectionManagerBuilder, selectedProxy);
connectionManagerBuilder.setSocketConfigResolver(route -> {
SocketConfig.Builder socketConfigBuilder = SocketConfig.custom();
// Configure SOCKS proxy if enabled
if (proxyManager.isProxyEnabled()) {
SocksProxyConfiguration.SocksProxy selectedProxy = proxyManager.selectProxy();
InetSocketAddress socksProxyAddress = new InetSocketAddress(selectedProxy.getHost(), selectedProxy.getPort());
socketConfigBuilder.setSocksProxyAddress(socksProxyAddress);
}
socketConfigBuilder
.setSoTimeout(Timeout.ofSeconds(10))
.setSoLinger(TimeValue.ofSeconds(-1));
return socketConfigBuilder.build();
});
connectionManager = connectionManagerBuilder.build();
connectionManager.setDefaultSocketConfig(SocketConfig.custom()
.setSoLinger(TimeValue.ofSeconds(-1))
.setSoTimeout(Timeout.ofSeconds(10))
.build()
);
Thread.ofPlatform().daemon(true).start(() -> {
try {
for (;;) {

View File

@@ -3,7 +3,6 @@ package nu.marginalia.livecrawler.io;
import com.google.inject.Provider;
import nu.marginalia.proxy.SocksProxyConfiguration;
import nu.marginalia.proxy.SocksProxyManager;
import nu.marginalia.proxy.SocksProxyHttpClientFactory;
import org.apache.hc.client5.http.ConnectionKeepAliveStrategy;
import org.apache.hc.client5.http.classic.HttpClient;
import org.apache.hc.client5.http.config.ConnectionConfig;
@@ -24,6 +23,7 @@ import org.apache.hc.core5.util.Timeout;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.net.InetSocketAddress;
import java.security.KeyManagementException;
import java.security.NoSuchAlgorithmException;
import java.util.Iterator;
@@ -58,17 +58,23 @@ public class HttpClientProvider implements Provider<HttpClient> {
.setDefaultConnectionConfig(connectionConfig);
// Configure SOCKS proxy if enabled
SocksProxyConfiguration.SocksProxy selectedProxy = proxyManager.selectProxy();
SocksProxyHttpClientFactory.configureConnectionManager(connectionManagerBuilder, selectedProxy);
connectionManagerBuilder.setSocketConfigResolver(route -> {
SocketConfig.Builder socketConfigBuilder = SocketConfig.custom();
// Configure SOCKS proxy if enabled
if (proxyManager.isProxyEnabled()) {
SocksProxyConfiguration.SocksProxy selectedProxy = proxyManager.selectProxy();
InetSocketAddress socksProxyAddress = new InetSocketAddress(selectedProxy.getHost(), selectedProxy.getPort());
socketConfigBuilder.setSocksProxyAddress(socksProxyAddress);
}
socketConfigBuilder
.setSoTimeout(Timeout.ofSeconds(30))
.setSoLinger(TimeValue.ofSeconds(-1));
return socketConfigBuilder.build();
});
connectionManager = connectionManagerBuilder.build();
connectionManager.setDefaultSocketConfig(SocketConfig.custom()
.setSoLinger(TimeValue.ofSeconds(-1))
.setSoTimeout(Timeout.ofSeconds(10))
.build()
);
Thread.ofPlatform().daemon(true).start(() -> {
try {
for (;;) {

View File

@@ -3,7 +3,6 @@ package nu.marginalia.ndp.io;
import com.google.inject.Provider;
import nu.marginalia.proxy.SocksProxyConfiguration;
import nu.marginalia.proxy.SocksProxyManager;
import nu.marginalia.proxy.SocksProxyHttpClientFactory;
import org.apache.hc.client5.http.ConnectionKeepAliveStrategy;
import org.apache.hc.client5.http.classic.HttpClient;
import org.apache.hc.client5.http.config.ConnectionConfig;
@@ -24,6 +23,7 @@ import org.apache.hc.core5.util.Timeout;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.net.InetSocketAddress;
import java.security.KeyManagementException;
import java.security.NoSuchAlgorithmException;
import java.util.Iterator;
@@ -57,18 +57,23 @@ public class HttpClientProvider implements Provider<HttpClient> {
.setMaxConnTotal(50)
.setDefaultConnectionConfig(connectionConfig);
// Configure SOCKS proxy if enabled
SocksProxyConfiguration.SocksProxy selectedProxy = proxyManager.selectProxy();
SocksProxyHttpClientFactory.configureConnectionManager(connectionManagerBuilder, selectedProxy);
connectionManagerBuilder.setSocketConfigResolver(route -> {
SocketConfig.Builder socketConfigBuilder = SocketConfig.custom();
// Configure SOCKS proxy if enabled
if (proxyManager.isProxyEnabled()) {
SocksProxyConfiguration.SocksProxy selectedProxy = proxyManager.selectProxy();
InetSocketAddress socksProxyAddress = new InetSocketAddress(selectedProxy.getHost(), selectedProxy.getPort());
socketConfigBuilder.setSocksProxyAddress(socksProxyAddress);
}
socketConfigBuilder
.setSoTimeout(Timeout.ofSeconds(10))
.setSoLinger(TimeValue.ofSeconds(-1));
return socketConfigBuilder.build();
});
connectionManager = connectionManagerBuilder.build();
connectionManager.setDefaultSocketConfig(SocketConfig.custom()
.setSoLinger(TimeValue.ofSeconds(-1))
.setSoTimeout(Timeout.ofSeconds(10))
.build()
);
Thread.ofPlatform().daemon(true).start(() -> {
try {
for (;;) {

View File

@@ -3,7 +3,6 @@ package nu.marginalia.ping.io;
import com.google.inject.Provider;
import nu.marginalia.proxy.SocksProxyConfiguration;
import nu.marginalia.proxy.SocksProxyManager;
import nu.marginalia.proxy.SocksProxyHttpClientFactory;
import org.apache.hc.client5.http.ConnectionKeepAliveStrategy;
import org.apache.hc.client5.http.classic.HttpClient;
import org.apache.hc.client5.http.config.ConnectionConfig;
@@ -30,6 +29,7 @@ import org.slf4j.LoggerFactory;
import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import java.net.InetSocketAddress;
import java.security.KeyManagementException;
import java.security.NoSuchAlgorithmException;
import java.security.cert.X509Certificate;
@@ -97,18 +97,23 @@ public class HttpClientProvider implements Provider<HttpClient> {
.setTlsSocketStrategy(
new DefaultClientTlsStrategy(sslContext, NoopHostnameVerifier.INSTANCE));
// Configure SOCKS proxy if enabled
SocksProxyConfiguration.SocksProxy selectedProxy = proxyManager.selectProxy();
SocksProxyHttpClientFactory.configureConnectionManager(connectionManagerBuilder, selectedProxy);
connectionManagerBuilder.setSocketConfigResolver(route -> {
SocketConfig.Builder socketConfigBuilder = SocketConfig.custom();
// Configure SOCKS proxy if enabled
if (proxyManager.isProxyEnabled()) {
SocksProxyConfiguration.SocksProxy selectedProxy = proxyManager.selectProxy();
InetSocketAddress socksProxyAddress = new InetSocketAddress(selectedProxy.getHost(), selectedProxy.getPort());
socketConfigBuilder.setSocksProxyAddress(socksProxyAddress);
}
socketConfigBuilder
.setSoTimeout(Timeout.ofSeconds(10))
.setSoLinger(TimeValue.ofSeconds(-1));
return socketConfigBuilder.build();
});
connectionManager = connectionManagerBuilder.build();
connectionManager.setDefaultSocketConfig(SocketConfig.custom()
.setSoLinger(TimeValue.ofSeconds(-1))
.setSoTimeout(Timeout.ofSeconds(10))
.build()
);
Thread.ofPlatform().daemon(true).start(() -> {
try {
for (;;) {