mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-05 21:22:39 +02:00
Compare commits
12 Commits
adc815e282
...
f6d5d7f196
Author | SHA1 | Date | |
---|---|---|---|
|
f6d5d7f196 | ||
|
abf1186fa7 | ||
|
94a77ebddf | ||
|
4e2f76a477 | ||
|
4cd1834938 | ||
|
5cbbea67ed | ||
|
b688f15550 | ||
|
f55af8ef48 | ||
|
164a646af6 | ||
|
b8000721bd | ||
|
2ee0b0e420 | ||
|
ec5f32b1d8 |
@@ -1,7 +1,7 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
id("org.jetbrains.gradle.plugin.idea-ext") version "1.0"
|
||||
id "me.champeau.jmh" version "0.6.6"
|
||||
id "me.champeau.jmh" version "0.7.3"
|
||||
|
||||
// This is a workaround for a bug in the Jib plugin that causes it to stall randomly
|
||||
// https://github.com/GoogleContainerTools/jib/issues/3347
|
||||
@@ -47,8 +47,8 @@ subprojects.forEach {it ->
|
||||
}
|
||||
|
||||
ext {
|
||||
jvmVersion = 24
|
||||
dockerImageBase='container-registry.oracle.com/graalvm/jdk:24'
|
||||
jvmVersion = 25
|
||||
dockerImageBase='container-registry.oracle.com/graalvm/jdk:25'
|
||||
dockerImageTag='latest'
|
||||
dockerImageRegistry='marginalia'
|
||||
jibVersion = '3.4.5'
|
||||
|
@@ -19,6 +19,7 @@ dependencies {
|
||||
|
||||
implementation libs.bundles.slf4j
|
||||
implementation libs.bundles.mariadb
|
||||
implementation libs.bundles.httpcomponents
|
||||
implementation libs.mockito
|
||||
implementation libs.guava
|
||||
implementation dependencies.create(libs.guice.get()) {
|
||||
|
@@ -0,0 +1,141 @@
|
||||
package nu.marginalia.proxy;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Configuration for SOCKS proxy settings used by crawlers to distribute IP footprint.
|
||||
*/
|
||||
public class SocksProxyConfiguration {
|
||||
|
||||
private final boolean enabled;
|
||||
private final List<SocksProxy> proxies;
|
||||
private final ProxySelectionStrategy strategy;
|
||||
|
||||
public SocksProxyConfiguration() {
|
||||
this.enabled = Boolean.parseBoolean(System.getProperty("crawler.socksProxy.enabled", "false"));
|
||||
this.strategy = ProxySelectionStrategy.valueOf(
|
||||
System.getProperty("crawler.socksProxy.strategy", "ROUND_ROBIN")
|
||||
);
|
||||
this.proxies = parseProxies();
|
||||
}
|
||||
|
||||
private List<SocksProxy> parseProxies() {
|
||||
String proxyList = System.getProperty("crawler.socksProxy.list", "");
|
||||
if (proxyList.isEmpty()) {
|
||||
return List.of();
|
||||
}
|
||||
|
||||
return Arrays.stream(proxyList.split(","))
|
||||
.map(String::trim)
|
||||
.filter(s -> !s.isEmpty())
|
||||
.map(this::parseProxy)
|
||||
.filter(Objects::nonNull)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
private SocksProxy parseProxy(String proxyString) {
|
||||
try {
|
||||
// Expected format: "host:port" or "host:port:username:password"
|
||||
String[] parts = proxyString.split(":");
|
||||
if (parts.length < 2) {
|
||||
return null;
|
||||
}
|
||||
|
||||
String host = parts[0];
|
||||
int port = Integer.parseInt(parts[1]);
|
||||
|
||||
if (parts.length >= 4) {
|
||||
String username = parts[2];
|
||||
String password = parts[3];
|
||||
return new SocksProxy(host, port, username, password);
|
||||
} else {
|
||||
return new SocksProxy(host, port);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isEnabled() {
|
||||
return enabled && !proxies.isEmpty();
|
||||
}
|
||||
|
||||
public List<SocksProxy> getProxies() {
|
||||
return proxies;
|
||||
}
|
||||
|
||||
public ProxySelectionStrategy getStrategy() {
|
||||
return strategy;
|
||||
}
|
||||
|
||||
public enum ProxySelectionStrategy {
|
||||
ROUND_ROBIN,
|
||||
RANDOM
|
||||
}
|
||||
|
||||
public static class SocksProxy {
|
||||
private final String host;
|
||||
private final int port;
|
||||
private final String username;
|
||||
private final String password;
|
||||
|
||||
public SocksProxy(String host, int port) {
|
||||
this(host, port, null, null);
|
||||
}
|
||||
|
||||
public SocksProxy(String host, int port, String username, String password) {
|
||||
this.host = host;
|
||||
this.port = port;
|
||||
this.username = username;
|
||||
this.password = password;
|
||||
}
|
||||
|
||||
public String getHost() {
|
||||
return host;
|
||||
}
|
||||
|
||||
public int getPort() {
|
||||
return port;
|
||||
}
|
||||
|
||||
public String getUsername() {
|
||||
return username;
|
||||
}
|
||||
|
||||
public String getPassword() {
|
||||
return password;
|
||||
}
|
||||
|
||||
public boolean hasAuthentication() {
|
||||
return username != null && password != null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
if (hasAuthentication()) {
|
||||
return String.format("%s:%d (auth: %s)", host, port, username);
|
||||
} else {
|
||||
return String.format("%s:%d", host, port);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
SocksProxy that = (SocksProxy) o;
|
||||
return port == that.port &&
|
||||
Objects.equals(host, that.host) &&
|
||||
Objects.equals(username, that.username) &&
|
||||
Objects.equals(password, that.password);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(host, port, username, password);
|
||||
}
|
||||
}
|
||||
}
|
@@ -0,0 +1,79 @@
|
||||
package nu.marginalia.proxy;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
/**
|
||||
* Manages SOCKS proxy selection and rotation for crawler requests.
|
||||
*/
|
||||
public class SocksProxyManager {
|
||||
private static final Logger logger = LoggerFactory.getLogger(SocksProxyManager.class);
|
||||
|
||||
private final SocksProxyConfiguration config;
|
||||
private final AtomicInteger roundRobinIndex = new AtomicInteger(0);
|
||||
|
||||
public SocksProxyManager(SocksProxyConfiguration config) {
|
||||
this.config = config;
|
||||
|
||||
if (config.isEnabled()) {
|
||||
logger.info("SOCKS proxy support enabled with {} proxies using {} strategy",
|
||||
config.getProxies().size(), config.getStrategy());
|
||||
for (SocksProxyConfiguration.SocksProxy proxy : config.getProxies()) {
|
||||
logger.info(" - {}", proxy);
|
||||
}
|
||||
} else {
|
||||
logger.info("SOCKS proxy support disabled");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Selects the next proxy to use based on the configured strategy.
|
||||
*/
|
||||
@Nonnull
|
||||
public SocksProxyConfiguration.SocksProxy selectProxy() {
|
||||
if (!config.isEnabled()) {
|
||||
throw new IllegalStateException("Proxies not configured");
|
||||
}
|
||||
|
||||
List<SocksProxyConfiguration.SocksProxy> proxies = config.getProxies();
|
||||
if (proxies.isEmpty()) {
|
||||
throw new IllegalStateException("Proxies not configured");
|
||||
}
|
||||
|
||||
SocksProxyConfiguration.SocksProxy selectedProxy;
|
||||
switch (config.getStrategy()) {
|
||||
case ROUND_ROBIN:
|
||||
int index = roundRobinIndex.getAndIncrement() % proxies.size();
|
||||
selectedProxy = proxies.get(index);
|
||||
break;
|
||||
case RANDOM:
|
||||
int randomIndex = ThreadLocalRandom.current().nextInt(proxies.size());
|
||||
selectedProxy = proxies.get(randomIndex);
|
||||
break;
|
||||
default:
|
||||
selectedProxy = proxies.get(0);
|
||||
break;
|
||||
}
|
||||
|
||||
return selectedProxy;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the current proxy configuration.
|
||||
*/
|
||||
public SocksProxyConfiguration getConfiguration() {
|
||||
return config;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if proxy support is enabled and proxies are available.
|
||||
*/
|
||||
public boolean isProxyEnabled() {
|
||||
return config.isEnabled() && !config.getProxies().isEmpty();
|
||||
}
|
||||
}
|
@@ -2,9 +2,8 @@ package nu.marginalia.execution;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
|
||||
import nu.marginalia.actor.ExecutorActorControlService;
|
||||
import nu.marginalia.actor.ExecutorActor;
|
||||
import nu.marginalia.actor.ExecutorActorControlService;
|
||||
|
||||
@Singleton
|
||||
public class ExecutionInit {
|
||||
@@ -22,5 +21,8 @@ public class ExecutionInit {
|
||||
actorControlService.start(ExecutorActor.PROC_CRAWLER_SPAWNER);
|
||||
actorControlService.start(ExecutorActor.PROC_INDEX_CONSTRUCTOR_SPAWNER);
|
||||
actorControlService.start(ExecutorActor.PROC_LOADER_SPAWNER);
|
||||
actorControlService.start(ExecutorActor.PROC_EXPORT_TASKS_SPAWNER);
|
||||
actorControlService.stop(ExecutorActor.PROC_NDP_SPAWNER);
|
||||
actorControlService.stop(ExecutorActor.PROC_PING_SPAWNER);
|
||||
}
|
||||
}
|
||||
|
@@ -120,6 +120,17 @@ public class ProcessSpawnerService {
|
||||
args.add("-Dsystem.serviceNode=" + System.getProperty("system.serviceNode"));
|
||||
}
|
||||
|
||||
// Add SOCKS proxy properties for crawler processes
|
||||
if (System.getProperty("crawler.socksProxy.enabled") != null) {
|
||||
args.add("-Dcrawler.socksProxy.enabled=" + System.getProperty("crawler.socksProxy.enabled"));
|
||||
}
|
||||
if (System.getProperty("crawler.socksProxy.list") != null) {
|
||||
args.add("-Dcrawler.socksProxy.list=" + System.getProperty("crawler.socksProxy.list"));
|
||||
}
|
||||
if (System.getProperty("crawler.socksProxy.strategy") != null) {
|
||||
args.add("-Dcrawler.socksProxy.strategy=" + System.getProperty("crawler.socksProxy.strategy"));
|
||||
}
|
||||
|
||||
if (Boolean.getBoolean("system.profile")) {
|
||||
// add jfr options
|
||||
args.add("-XX:+FlightRecorder");
|
||||
|
@@ -1,6 +1,6 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
id "me.champeau.jmh" version "0.6.6"
|
||||
id "me.champeau.jmh" version "0.7.3"
|
||||
}
|
||||
|
||||
java {
|
||||
|
@@ -1,6 +1,6 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
id "me.champeau.jmh" version "0.6.6"
|
||||
id "me.champeau.jmh" version "0.7.3"
|
||||
}
|
||||
|
||||
java {
|
||||
|
@@ -1,6 +1,6 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
id "me.champeau.jmh" version "0.6.6"
|
||||
id "me.champeau.jmh" version "0.7.3"
|
||||
}
|
||||
|
||||
java {
|
||||
|
@@ -146,12 +146,16 @@ public class CrawlerMain extends ProcessMainClass {
|
||||
System.setProperty("http.agent", WmsaHome.getUserAgent().uaString());
|
||||
|
||||
// If these aren't set properly, the JVM will hang forever on some requests
|
||||
System.setProperty("sun.net.client.defaultConnectTimeout", "30000");
|
||||
System.setProperty("sun.net.client.defaultReadTimeout", "30000");
|
||||
System.setProperty("sun.net.client.defaultConnectTimeout",
|
||||
System.getProperty("crawler.jvmConnectTimeout", "30000"));
|
||||
System.setProperty("sun.net.client.defaultReadTimeout",
|
||||
System.getProperty("crawler.jvmReadTimeout", "30000"));
|
||||
|
||||
// Set the maximum number of connections to keep alive in the connection pool
|
||||
System.setProperty("jdk.httpclient.idleTimeout", "15"); // 15 seconds
|
||||
System.setProperty("jdk.httpclient.connectionPoolSize", "256");
|
||||
System.setProperty("jdk.httpclient.idleTimeout",
|
||||
System.getProperty("crawler.httpClientIdleTimeout", "15")); // 15 seconds
|
||||
System.setProperty("jdk.httpclient.connectionPoolSize",
|
||||
System.getProperty("crawler.httpClientConnectionPoolSize", "256"));
|
||||
|
||||
// We don't want to use too much memory caching sessions for https
|
||||
System.setProperty("javax.net.ssl.sessionCacheSize", "2048");
|
||||
|
@@ -14,6 +14,8 @@ import nu.marginalia.model.body.ContentTypeLogic;
|
||||
import nu.marginalia.model.body.DocumentBodyExtractor;
|
||||
import nu.marginalia.model.body.HttpFetchResult;
|
||||
import nu.marginalia.model.crawldata.CrawlerDomainStatus;
|
||||
import nu.marginalia.proxy.SocksProxyConfiguration;
|
||||
import nu.marginalia.proxy.SocksProxyManager;
|
||||
import org.apache.hc.client5.http.ConnectionKeepAliveStrategy;
|
||||
import org.apache.hc.client5.http.HttpRequestRetryStrategy;
|
||||
import org.apache.hc.client5.http.classic.HttpClient;
|
||||
@@ -49,6 +51,7 @@ import org.slf4j.MarkerFactory;
|
||||
import javax.net.ssl.SSLContext;
|
||||
import javax.net.ssl.SSLException;
|
||||
import java.io.IOException;
|
||||
import java.net.InetSocketAddress;
|
||||
import java.net.SocketTimeoutException;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.UnknownHostException;
|
||||
@@ -70,6 +73,7 @@ public class HttpFetcherImpl implements HttpFetcher, HttpRequestRetryStrategy {
|
||||
private final String userAgentIdentifier;
|
||||
|
||||
private final CookieStore cookies = new BasicCookieStore();
|
||||
private final SocksProxyManager proxyManager;
|
||||
|
||||
private static final SimpleRobotRulesParser robotsParser = new SimpleRobotRulesParser();
|
||||
private static final ContentTypeLogic contentTypeLogic = new ContentTypeLogic();
|
||||
@@ -90,29 +94,38 @@ public class HttpFetcherImpl implements HttpFetcher, HttpRequestRetryStrategy {
|
||||
|
||||
private CloseableHttpClient createClient() throws NoSuchAlgorithmException, KeyManagementException {
|
||||
final ConnectionConfig connectionConfig = ConnectionConfig.custom()
|
||||
.setSocketTimeout(10, TimeUnit.SECONDS)
|
||||
.setConnectTimeout(30, TimeUnit.SECONDS)
|
||||
.setSocketTimeout(Integer.getInteger("crawler.socketTimeout", 10), TimeUnit.SECONDS)
|
||||
.setConnectTimeout(Integer.getInteger("crawler.connectTimeout", 30), TimeUnit.SECONDS)
|
||||
.setValidateAfterInactivity(TimeValue.ofSeconds(5))
|
||||
.build();
|
||||
|
||||
|
||||
connectionManager = PoolingHttpClientConnectionManagerBuilder.create()
|
||||
PoolingHttpClientConnectionManagerBuilder connectionManagerBuilder = PoolingHttpClientConnectionManagerBuilder.create()
|
||||
.setMaxConnPerRoute(2)
|
||||
.setMaxConnTotal(5000)
|
||||
.setDefaultConnectionConfig(connectionConfig)
|
||||
.setTlsSocketStrategy(new DefaultClientTlsStrategy(SSLContext.getDefault()))
|
||||
.build();
|
||||
.setTlsSocketStrategy(new DefaultClientTlsStrategy(SSLContext.getDefault()));
|
||||
|
||||
connectionManager.setDefaultSocketConfig(SocketConfig.custom()
|
||||
.setSoLinger(TimeValue.ofSeconds(-1))
|
||||
.setSoTimeout(Timeout.ofSeconds(10))
|
||||
.build()
|
||||
);
|
||||
connectionManagerBuilder.setSocketConfigResolver(route -> {
|
||||
SocketConfig.Builder socketConfigBuilder = SocketConfig.custom();
|
||||
// Configure SOCKS proxy if enabled
|
||||
if (proxyManager.isProxyEnabled()) {
|
||||
SocksProxyConfiguration.SocksProxy selectedProxy = proxyManager.selectProxy();
|
||||
InetSocketAddress socksProxyAddress = new InetSocketAddress(selectedProxy.getHost(), selectedProxy.getPort());
|
||||
socketConfigBuilder.setSocksProxyAddress(socksProxyAddress);
|
||||
}
|
||||
socketConfigBuilder
|
||||
.setSoTimeout(Timeout.ofSeconds(Integer.getInteger("crawler.socketTimeout", 10)))
|
||||
.setSoLinger(TimeValue.ofSeconds(-1));
|
||||
|
||||
return socketConfigBuilder.build();
|
||||
});
|
||||
|
||||
connectionManager = connectionManagerBuilder.build();
|
||||
|
||||
final RequestConfig defaultRequestConfig = RequestConfig.custom()
|
||||
.setCookieSpec(StandardCookieSpec.RELAXED)
|
||||
.setResponseTimeout(10, TimeUnit.SECONDS)
|
||||
.setConnectionRequestTimeout(5, TimeUnit.MINUTES)
|
||||
.setResponseTimeout(Integer.getInteger("crawler.responseTimeout", 10), TimeUnit.SECONDS)
|
||||
.setConnectionRequestTimeout(Integer.getInteger("crawler.connectionRequestTimeout", 5), TimeUnit.MINUTES)
|
||||
.build();
|
||||
|
||||
return HttpClients.custom()
|
||||
@@ -169,6 +182,7 @@ public class HttpFetcherImpl implements HttpFetcher, HttpRequestRetryStrategy {
|
||||
@Inject
|
||||
public HttpFetcherImpl(UserAgent userAgent)
|
||||
{
|
||||
this.proxyManager = new SocksProxyManager(new SocksProxyConfiguration());
|
||||
try {
|
||||
this.client = createClient();
|
||||
} catch (NoSuchAlgorithmException e) {
|
||||
@@ -181,6 +195,7 @@ public class HttpFetcherImpl implements HttpFetcher, HttpRequestRetryStrategy {
|
||||
}
|
||||
|
||||
public HttpFetcherImpl(String userAgent) {
|
||||
this.proxyManager = new SocksProxyManager(new SocksProxyConfiguration());
|
||||
try {
|
||||
this.client = createClient();
|
||||
} catch (NoSuchAlgorithmException e) {
|
||||
|
@@ -29,6 +29,37 @@ documents from each domain, to avoid wasting time and resources on domains that
|
||||
|
||||
On top of organic links, the crawler can use sitemaps and rss-feeds to discover new documents.
|
||||
|
||||
## Configuration
|
||||
|
||||
The crawler supports various configuration options via system properties that can be set in `system.properties`:
|
||||
|
||||
### Crawler Behavior
|
||||
- `crawler.crawlSetGrowthFactor` (default: 1.25) - Growth factor for crawl depth
|
||||
- `crawler.minUrlsPerDomain` (default: 100) - Minimum URLs to crawl per domain
|
||||
- `crawler.maxUrlsPerDomain` (default: 10000) - Maximum URLs to crawl per domain
|
||||
- `crawler.poolSize` (default: 256) - Thread pool size for concurrent crawling
|
||||
- `crawler.useVirtualThreads` (default: false) - Use virtual threads instead of platform threads
|
||||
- `crawler.maxConcurrentRequests` (default: 512) - Maximum concurrent HTTP requests
|
||||
- `crawler.maxFetchSize` (default: 33554432) - Maximum fetch size in bytes
|
||||
|
||||
### Timeout Configuration
|
||||
- `crawler.socketTimeout` (default: 10) - Socket timeout in seconds
|
||||
- `crawler.connectTimeout` (default: 30) - Connection timeout in seconds
|
||||
- `crawler.responseTimeout` (default: 10) - Response timeout in seconds
|
||||
- `crawler.connectionRequestTimeout` (default: 5) - Connection request timeout in minutes
|
||||
- `crawler.jvmConnectTimeout` (default: 30000) - JVM-level connect timeout in milliseconds
|
||||
- `crawler.jvmReadTimeout` (default: 30000) - JVM-level read timeout in milliseconds
|
||||
- `crawler.httpClientIdleTimeout` (default: 15) - HTTP client idle timeout in seconds
|
||||
- `crawler.httpClientConnectionPoolSize` (default: 256) - HTTP client connection pool size
|
||||
|
||||
### User Agent Configuration
|
||||
- `crawler.userAgentString` - Custom user agent string
|
||||
- `crawler.userAgentIdentifier` - User agent identifier
|
||||
|
||||
### Other Options
|
||||
- `links.block_mailing_lists` (default: false) - Block mailing list links
|
||||
- `ip-blocklist.disabled` (default: false) - Disable IP blocklist
|
||||
|
||||
## Central Classes
|
||||
|
||||
* [CrawlerMain](java/nu/marginalia/crawl/CrawlerMain.java) orchestrates the crawling.
|
||||
|
@@ -0,0 +1,75 @@
|
||||
package nu.marginalia.crawl.fetcher;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
/**
|
||||
* Test to verify that timeout configuration properties are properly applied.
|
||||
*/
|
||||
public class HttpFetcherTimeoutConfigTest {
|
||||
|
||||
@BeforeEach
|
||||
void setUp() {
|
||||
// Clear any existing system properties to ensure clean test
|
||||
System.clearProperty("crawler.socketTimeout");
|
||||
System.clearProperty("crawler.connectTimeout");
|
||||
System.clearProperty("crawler.responseTimeout");
|
||||
System.clearProperty("crawler.connectionRequestTimeout");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testDefaultTimeoutValues() {
|
||||
// Test that default values are used when no system properties are set
|
||||
HttpFetcherImpl fetcher = new HttpFetcherImpl("test-agent");
|
||||
|
||||
// Verify that the fetcher was created successfully with default timeouts
|
||||
assertNotNull(fetcher);
|
||||
|
||||
// The actual timeout values are private, but we can verify the fetcher
|
||||
// was created without exceptions, indicating the default values were used
|
||||
}
|
||||
|
||||
@Test
|
||||
void testCustomTimeoutValues() {
|
||||
// Set custom timeout values
|
||||
System.setProperty("crawler.socketTimeout", "15");
|
||||
System.setProperty("crawler.connectTimeout", "45");
|
||||
System.setProperty("crawler.responseTimeout", "20");
|
||||
System.setProperty("crawler.connectionRequestTimeout", "3");
|
||||
|
||||
try {
|
||||
HttpFetcherImpl fetcher = new HttpFetcherImpl("test-agent");
|
||||
|
||||
// Verify that the fetcher was created successfully with custom timeouts
|
||||
assertNotNull(fetcher);
|
||||
|
||||
// The actual timeout values are private, but we can verify the fetcher
|
||||
// was created without exceptions, indicating the custom values were used
|
||||
} finally {
|
||||
// Clean up system properties
|
||||
System.clearProperty("crawler.socketTimeout");
|
||||
System.clearProperty("crawler.connectTimeout");
|
||||
System.clearProperty("crawler.responseTimeout");
|
||||
System.clearProperty("crawler.connectionRequestTimeout");
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void testInvalidTimeoutValues() {
|
||||
// Set invalid timeout values to test error handling
|
||||
System.setProperty("crawler.socketTimeout", "invalid");
|
||||
System.setProperty("crawler.connectTimeout", "-5");
|
||||
|
||||
try {
|
||||
// This should throw an exception for invalid timeout values
|
||||
assertThrows(Exception.class, () -> {
|
||||
HttpFetcherImpl fetcher = new HttpFetcherImpl("test-agent");
|
||||
});
|
||||
} finally {
|
||||
// Clean up system properties
|
||||
System.clearProperty("crawler.socketTimeout");
|
||||
System.clearProperty("crawler.connectTimeout");
|
||||
}
|
||||
}
|
||||
}
|
@@ -108,8 +108,10 @@ public class LiveCrawlerMain extends ProcessMainClass {
|
||||
System.setProperty("http.agent", WmsaHome.getUserAgent().uaString());
|
||||
|
||||
// If these aren't set properly, the JVM will hang forever on some requests
|
||||
System.setProperty("sun.net.client.defaultConnectTimeout", "30000");
|
||||
System.setProperty("sun.net.client.defaultReadTimeout", "30000");
|
||||
System.setProperty("sun.net.client.defaultConnectTimeout",
|
||||
System.getProperty("crawler.jvmConnectTimeout", "30000"));
|
||||
System.setProperty("sun.net.client.defaultReadTimeout",
|
||||
System.getProperty("crawler.jvmReadTimeout", "30000"));
|
||||
|
||||
// We don't want to use too much memory caching sessions for https
|
||||
System.setProperty("javax.net.ssl.sessionCacheSize", "2048");
|
||||
|
@@ -1,6 +1,8 @@
|
||||
package nu.marginalia.livecrawler.io;
|
||||
|
||||
import com.google.inject.Provider;
|
||||
import nu.marginalia.proxy.SocksProxyConfiguration;
|
||||
import nu.marginalia.proxy.SocksProxyManager;
|
||||
import org.apache.hc.client5.http.ConnectionKeepAliveStrategy;
|
||||
import org.apache.hc.client5.http.classic.HttpClient;
|
||||
import org.apache.hc.client5.http.config.ConnectionConfig;
|
||||
@@ -21,6 +23,7 @@ import org.apache.hc.core5.util.Timeout;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.net.InetSocketAddress;
|
||||
import java.security.KeyManagementException;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.util.Iterator;
|
||||
@@ -29,10 +32,12 @@ import java.util.concurrent.TimeUnit;
|
||||
public class HttpClientProvider implements Provider<HttpClient> {
|
||||
private static final HttpClient client;
|
||||
private static PoolingHttpClientConnectionManager connectionManager;
|
||||
private static final SocksProxyManager proxyManager;
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(HttpClientProvider.class);
|
||||
|
||||
static {
|
||||
proxyManager = new SocksProxyManager(new SocksProxyConfiguration());
|
||||
try {
|
||||
client = createClient();
|
||||
} catch (Exception e) {
|
||||
@@ -47,18 +52,28 @@ public class HttpClientProvider implements Provider<HttpClient> {
|
||||
.setValidateAfterInactivity(TimeValue.ofSeconds(5))
|
||||
.build();
|
||||
|
||||
|
||||
connectionManager = PoolingHttpClientConnectionManagerBuilder.create()
|
||||
PoolingHttpClientConnectionManagerBuilder connectionManagerBuilder = PoolingHttpClientConnectionManagerBuilder.create()
|
||||
.setMaxConnPerRoute(2)
|
||||
.setMaxConnTotal(50)
|
||||
.setDefaultConnectionConfig(connectionConfig)
|
||||
.build();
|
||||
.setDefaultConnectionConfig(connectionConfig);
|
||||
|
||||
connectionManager.setDefaultSocketConfig(SocketConfig.custom()
|
||||
.setSoLinger(TimeValue.ofSeconds(-1))
|
||||
.setSoTimeout(Timeout.ofSeconds(10))
|
||||
.build()
|
||||
);
|
||||
// Configure SOCKS proxy if enabled
|
||||
connectionManagerBuilder.setSocketConfigResolver(route -> {
|
||||
SocketConfig.Builder socketConfigBuilder = SocketConfig.custom();
|
||||
// Configure SOCKS proxy if enabled
|
||||
if (proxyManager.isProxyEnabled()) {
|
||||
SocksProxyConfiguration.SocksProxy selectedProxy = proxyManager.selectProxy();
|
||||
InetSocketAddress socksProxyAddress = new InetSocketAddress(selectedProxy.getHost(), selectedProxy.getPort());
|
||||
socketConfigBuilder.setSocksProxyAddress(socksProxyAddress);
|
||||
}
|
||||
socketConfigBuilder
|
||||
.setSoTimeout(Timeout.ofSeconds(30))
|
||||
.setSoLinger(TimeValue.ofSeconds(-1));
|
||||
|
||||
return socketConfigBuilder.build();
|
||||
});
|
||||
|
||||
connectionManager = connectionManagerBuilder.build();
|
||||
|
||||
final RequestConfig defaultRequestConfig = RequestConfig.custom()
|
||||
.setCookieSpec(StandardCookieSpec.IGNORE)
|
||||
|
@@ -1,6 +1,8 @@
|
||||
package nu.marginalia.ndp.io;
|
||||
|
||||
import com.google.inject.Provider;
|
||||
import nu.marginalia.proxy.SocksProxyConfiguration;
|
||||
import nu.marginalia.proxy.SocksProxyManager;
|
||||
import org.apache.hc.client5.http.ConnectionKeepAliveStrategy;
|
||||
import org.apache.hc.client5.http.classic.HttpClient;
|
||||
import org.apache.hc.client5.http.config.ConnectionConfig;
|
||||
@@ -21,6 +23,7 @@ import org.apache.hc.core5.util.Timeout;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.net.InetSocketAddress;
|
||||
import java.security.KeyManagementException;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.util.Iterator;
|
||||
@@ -29,10 +32,12 @@ import java.util.concurrent.TimeUnit;
|
||||
public class HttpClientProvider implements Provider<HttpClient> {
|
||||
private static final HttpClient client;
|
||||
private static PoolingHttpClientConnectionManager connectionManager;
|
||||
private static final SocksProxyManager proxyManager;
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(HttpClientProvider.class);
|
||||
|
||||
static {
|
||||
proxyManager = new SocksProxyManager(new SocksProxyConfiguration());
|
||||
try {
|
||||
client = createClient();
|
||||
} catch (Exception e) {
|
||||
@@ -47,18 +52,27 @@ public class HttpClientProvider implements Provider<HttpClient> {
|
||||
.setValidateAfterInactivity(TimeValue.ofSeconds(5))
|
||||
.build();
|
||||
|
||||
|
||||
connectionManager = PoolingHttpClientConnectionManagerBuilder.create()
|
||||
PoolingHttpClientConnectionManagerBuilder connectionManagerBuilder = PoolingHttpClientConnectionManagerBuilder.create()
|
||||
.setMaxConnPerRoute(2)
|
||||
.setMaxConnTotal(50)
|
||||
.setDefaultConnectionConfig(connectionConfig)
|
||||
.build();
|
||||
.setDefaultConnectionConfig(connectionConfig);
|
||||
|
||||
connectionManager.setDefaultSocketConfig(SocketConfig.custom()
|
||||
.setSoLinger(TimeValue.ofSeconds(-1))
|
||||
.setSoTimeout(Timeout.ofSeconds(10))
|
||||
.build()
|
||||
);
|
||||
connectionManagerBuilder.setSocketConfigResolver(route -> {
|
||||
SocketConfig.Builder socketConfigBuilder = SocketConfig.custom();
|
||||
// Configure SOCKS proxy if enabled
|
||||
if (proxyManager.isProxyEnabled()) {
|
||||
SocksProxyConfiguration.SocksProxy selectedProxy = proxyManager.selectProxy();
|
||||
InetSocketAddress socksProxyAddress = new InetSocketAddress(selectedProxy.getHost(), selectedProxy.getPort());
|
||||
socketConfigBuilder.setSocksProxyAddress(socksProxyAddress);
|
||||
}
|
||||
socketConfigBuilder
|
||||
.setSoTimeout(Timeout.ofSeconds(10))
|
||||
.setSoLinger(TimeValue.ofSeconds(-1));
|
||||
|
||||
return socketConfigBuilder.build();
|
||||
});
|
||||
|
||||
connectionManager = connectionManagerBuilder.build();
|
||||
|
||||
final RequestConfig defaultRequestConfig = RequestConfig.custom()
|
||||
.setCookieSpec(StandardCookieSpec.IGNORE)
|
||||
|
@@ -1,6 +1,8 @@
|
||||
package nu.marginalia.ping.io;
|
||||
|
||||
import com.google.inject.Provider;
|
||||
import nu.marginalia.proxy.SocksProxyConfiguration;
|
||||
import nu.marginalia.proxy.SocksProxyManager;
|
||||
import org.apache.hc.client5.http.ConnectionKeepAliveStrategy;
|
||||
import org.apache.hc.client5.http.classic.HttpClient;
|
||||
import org.apache.hc.client5.http.config.ConnectionConfig;
|
||||
@@ -27,6 +29,7 @@ import org.slf4j.LoggerFactory;
|
||||
import javax.net.ssl.SSLContext;
|
||||
import javax.net.ssl.TrustManager;
|
||||
import javax.net.ssl.X509TrustManager;
|
||||
import java.net.InetSocketAddress;
|
||||
import java.security.KeyManagementException;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.security.cert.X509Certificate;
|
||||
@@ -36,10 +39,12 @@ import java.util.concurrent.TimeUnit;
|
||||
public class HttpClientProvider implements Provider<HttpClient> {
|
||||
private static final HttpClient client;
|
||||
private static PoolingHttpClientConnectionManager connectionManager;
|
||||
private static final SocksProxyManager proxyManager;
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(HttpClientProvider.class);
|
||||
|
||||
static {
|
||||
proxyManager = new SocksProxyManager(new SocksProxyConfiguration());
|
||||
try {
|
||||
client = createClient();
|
||||
} catch (Exception e) {
|
||||
@@ -85,19 +90,29 @@ public class HttpClientProvider implements Provider<HttpClient> {
|
||||
SSLContext sslContext = SSLContextBuilder.create().build();
|
||||
sslContext.init(null, new TrustManager[]{trustMeBro}, null);
|
||||
|
||||
connectionManager = PoolingHttpClientConnectionManagerBuilder.create()
|
||||
PoolingHttpClientConnectionManagerBuilder connectionManagerBuilder = PoolingHttpClientConnectionManagerBuilder.create()
|
||||
.setMaxConnPerRoute(2)
|
||||
.setMaxConnTotal(50)
|
||||
.setDefaultConnectionConfig(connectionConfig)
|
||||
.setTlsSocketStrategy(
|
||||
new DefaultClientTlsStrategy(sslContext, NoopHostnameVerifier.INSTANCE))
|
||||
.build();
|
||||
new DefaultClientTlsStrategy(sslContext, NoopHostnameVerifier.INSTANCE));
|
||||
|
||||
connectionManager.setDefaultSocketConfig(SocketConfig.custom()
|
||||
.setSoLinger(TimeValue.ofSeconds(-1))
|
||||
.setSoTimeout(Timeout.ofSeconds(10))
|
||||
.build()
|
||||
);
|
||||
connectionManagerBuilder.setSocketConfigResolver(route -> {
|
||||
SocketConfig.Builder socketConfigBuilder = SocketConfig.custom();
|
||||
// Configure SOCKS proxy if enabled
|
||||
if (proxyManager.isProxyEnabled()) {
|
||||
SocksProxyConfiguration.SocksProxy selectedProxy = proxyManager.selectProxy();
|
||||
InetSocketAddress socksProxyAddress = new InetSocketAddress(selectedProxy.getHost(), selectedProxy.getPort());
|
||||
socketConfigBuilder.setSocksProxyAddress(socksProxyAddress);
|
||||
}
|
||||
socketConfigBuilder
|
||||
.setSoTimeout(Timeout.ofSeconds(10))
|
||||
.setSoLinger(TimeValue.ofSeconds(-1));
|
||||
|
||||
return socketConfigBuilder.build();
|
||||
});
|
||||
|
||||
connectionManager = connectionManagerBuilder.build();
|
||||
|
||||
final RequestConfig defaultRequestConfig = RequestConfig.custom()
|
||||
.setCookieSpec(StandardCookieSpec.IGNORE)
|
||||
|
@@ -91,6 +91,11 @@ dependencies {
|
||||
testImplementation dependencies.create(libs.spark.get())
|
||||
}
|
||||
|
||||
interface InjectedExecOps {
|
||||
@Inject //@javax.inject.Inject
|
||||
ExecOperations getExecOps()
|
||||
}
|
||||
|
||||
task compileTailwind {
|
||||
def inputFile = file('tailwind/globals.css')
|
||||
def configFile = file('tailwind/tailwind.config.js')
|
||||
@@ -103,7 +108,8 @@ task compileTailwind {
|
||||
outputs.file outputFile
|
||||
|
||||
doLast {
|
||||
exec {
|
||||
def injected = project.objects.newInstance(InjectedExecOps)
|
||||
injected.execOps.exec {
|
||||
// If you're getting a build error like 'npm error could not determine executable to run'
|
||||
// pointing you here, you need to run `npm install -D tailwindcss`
|
||||
workingDir projectDir
|
||||
|
@@ -93,7 +93,7 @@ public class IndexService extends SparkService {
|
||||
|
||||
@MqRequest(endpoint="FIRST-BOOT")
|
||||
public void setUpDefaultActors(String message) throws Exception {
|
||||
logger.info("Initializing default actors");
|
||||
eventLog.logEvent("FIRST-BOOT", "Initializing default actors");
|
||||
|
||||
executionInit.initDefaultActors();
|
||||
}
|
||||
|
198
doc/socks-proxy-configuration.md
Normal file
198
doc/socks-proxy-configuration.md
Normal file
@@ -0,0 +1,198 @@
|
||||
# SOCKS Proxy Support for Crawlers
|
||||
|
||||
This document describes how to configure and use SOCKS proxy support in Marginalia's crawler processes to distribute IP footprint across multiple remote servers.
|
||||
|
||||
## Overview
|
||||
|
||||
The SOCKS proxy feature allows crawlers to route their HTTP requests through SOCKS proxies running on remote servers. This helps distribute the IP footprint and avoid rate limiting or blocking from target websites.
|
||||
|
||||
## Configuration
|
||||
|
||||
SOCKS proxy support is configured via system properties. The following properties are available:
|
||||
|
||||
### Basic Configuration
|
||||
|
||||
- `crawler.socksProxy.enabled` (default: `false`)
|
||||
- Set to `true` to enable SOCKS proxy support
|
||||
|
||||
- `crawler.socksProxy.list` (default: empty)
|
||||
- Comma-separated list of SOCKS proxy servers
|
||||
- Format: `host:port` or `host:port:username:password`
|
||||
- Example: `1.1.1.5:1080,1.1.1.10:1080,1.1.1.15:1080`
|
||||
|
||||
- `crawler.socksProxy.strategy` (default: `ROUND_ROBIN`)
|
||||
- Proxy selection strategy: `ROUND_ROBIN` or `RANDOM`
|
||||
|
||||
### Example Configuration
|
||||
|
||||
To enable SOCKS proxy with 3 remote servers using round-robin selection,
|
||||
in `conf/properties/system.properties` or passed as system properties to
|
||||
the java processes via `-D`:
|
||||
|
||||
```bash
|
||||
crawler.socksProxy.enabled=true
|
||||
crawler.socksProxy.list=1.1.1.5:1080,1.1.1.10:1080,1.1.1.15:1080
|
||||
crawler.socksProxy.strategy=ROUND_ROBIN
|
||||
```
|
||||
|
||||
For authenticated proxies:
|
||||
|
||||
```bash
|
||||
crawler.socksProxy.enabled=true
|
||||
crawler.socksProxy.list=1.1.1.5:1080:user1:pass1,1.1.1.10:1080:user2:pass2,1.1.1.15:1080:user3:pass3
|
||||
crawler.socksProxy.strategy=RANDOM
|
||||
```
|
||||
|
||||
## Setting Up SOCKS Proxies
|
||||
|
||||
### Using sockd (SOCKS5 daemon)
|
||||
|
||||
On each remote server (1.1.1.5, 1.1.1.10, 1.1.1.15), install and configure sockd:
|
||||
|
||||
1. Install sockd (varies by OS):
|
||||
```bash
|
||||
# Ubuntu/Debian
|
||||
sudo apt-get install dante-server
|
||||
|
||||
# CentOS/RHEL
|
||||
sudo yum install dante
|
||||
```
|
||||
|
||||
2. Configure sockd (`/etc/sockd.conf`):
|
||||
```
|
||||
# Listen on port 1080
|
||||
internal: 0.0.0.0 port = 1080
|
||||
|
||||
# Allow connections from your crawler host
|
||||
external: eth0
|
||||
|
||||
# Authentication (optional)
|
||||
user.privileged: root
|
||||
user.unprivileged: nobody
|
||||
|
||||
# Allow connections
|
||||
client pass {
|
||||
from: YOUR_CRAWLER_HOST_IP/32 to: 0.0.0.0/0
|
||||
log: connect disconnect
|
||||
}
|
||||
|
||||
# Allow outgoing connections
|
||||
pass {
|
||||
from: 0.0.0.0/0 to: 0.0.0.0/0
|
||||
protocol: tcp udp
|
||||
log: connect disconnect
|
||||
}
|
||||
```
|
||||
|
||||
3. Start sockd:
|
||||
```bash
|
||||
sudo systemctl start sockd
|
||||
sudo systemctl enable sockd
|
||||
```
|
||||
|
||||
### Alternative: Using SSH Tunnels
|
||||
|
||||
You can also use SSH tunnels as SOCKS proxies:
|
||||
|
||||
```bash
|
||||
# On your local machine, create SSH tunnels to remote servers
|
||||
ssh -D 1080 -N user@1.1.1.5
|
||||
ssh -D 1081 -N user@1.1.1.10
|
||||
ssh -D 1082 -N user@1.1.1.15
|
||||
```
|
||||
|
||||
Then configure:
|
||||
```bash
|
||||
-Dcrawler.socksProxy.list=localhost:1080,localhost:1081,localhost:1082
|
||||
```
|
||||
|
||||
## Affected Processes
|
||||
|
||||
SOCKS proxy support is integrated into the following crawler processes:
|
||||
|
||||
1. **Main Crawler** (`crawling-process`)
|
||||
- Primary web crawling functionality
|
||||
- Uses `HttpFetcherImpl` with SOCKS proxy support
|
||||
|
||||
2. **Live Crawler** (`live-crawling-process`)
|
||||
- Real-time crawling for live updates
|
||||
- Uses `HttpClientProvider` with SOCKS proxy support
|
||||
|
||||
3. **Ping Process** (`ping-process`)
|
||||
- Domain availability checking
|
||||
- Uses `HttpClientProvider` with SOCKS proxy support
|
||||
|
||||
4. **New Domain Process** (`new-domain-process`)
|
||||
- New domain discovery and validation
|
||||
- Uses `HttpClientProvider` with SOCKS proxy support
|
||||
|
||||
## Implementation Details
|
||||
|
||||
### Proxy Selection Strategies
|
||||
|
||||
- **ROUND_ROBIN**: Cycles through proxies in order, ensuring even distribution
|
||||
- **RANDOM**: Randomly selects a proxy for each request
|
||||
|
||||
### Connection Management
|
||||
|
||||
- Each HTTP client maintains its own connection pool
|
||||
- SOCKS proxy connections are established per request
|
||||
- Connection timeouts and retry logic remain unchanged
|
||||
- SSL/TLS connections work transparently through SOCKS proxies
|
||||
|
||||
### Logging
|
||||
|
||||
The system logs proxy selection and configuration:
|
||||
- Startup logs show enabled proxies and strategy
|
||||
- Debug logs show which proxy is selected for each request
|
||||
- Error logs indicate proxy connection failures
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
1. **Proxy Connection Failures**
|
||||
- Verify proxy servers are running and accessible
|
||||
- Check firewall rules allow connections to proxy ports
|
||||
- Verify proxy authentication credentials if required
|
||||
|
||||
2. **No Traffic Through Proxies**
|
||||
- Ensure `crawler.socksProxy.enabled=true`
|
||||
- Verify proxy list is correctly formatted
|
||||
- Check logs for proxy selection messages
|
||||
|
||||
3. **Performance Issues**
|
||||
- SOCKS proxies add latency - consider geographic proximity
|
||||
- Monitor proxy server resources and bandwidth
|
||||
- Adjust connection pool sizes if needed
|
||||
|
||||
### Testing Proxy Configuration
|
||||
|
||||
You can test your proxy configuration using curl:
|
||||
|
||||
```bash
|
||||
# Test individual proxies
|
||||
curl --socks5 1.1.1.5:1080 http://httpbin.org/ip
|
||||
curl --socks5 1.1.1.10:1080 http://httpbin.org/ip
|
||||
curl --socks5 1.1.1.15:1080 http://httpbin.org/ip
|
||||
|
||||
# Test with authentication
|
||||
curl --socks5 user:pass@1.1.1.5:1080 http://httpbin.org/ip
|
||||
```
|
||||
|
||||
## Security Considerations
|
||||
|
||||
- Use authenticated proxies when possible
|
||||
- Ensure proxy servers are properly secured
|
||||
- Monitor proxy usage for unusual patterns
|
||||
- Consider using VPNs or dedicated proxy services for production use
|
||||
- Rotate proxy credentials regularly
|
||||
|
||||
## Performance Impact
|
||||
|
||||
- **Latency**: SOCKS proxies add network latency (typically 10-100ms)
|
||||
- **Bandwidth**: Additional bandwidth usage for proxy connections
|
||||
- **Reliability**: Proxy failures can cause request failures
|
||||
- **Throughput**: May reduce overall crawling throughput
|
||||
|
||||
Consider these factors when planning your proxy infrastructure and crawling targets.
|
BIN
gradle/wrapper/gradle-wrapper.jar
vendored
BIN
gradle/wrapper/gradle-wrapper.jar
vendored
Binary file not shown.
4
gradle/wrapper/gradle-wrapper.properties
vendored
4
gradle/wrapper/gradle-wrapper.properties
vendored
@@ -1,5 +1,7 @@
|
||||
distributionBase=GRADLE_USER_HOME
|
||||
distributionPath=wrapper/dists
|
||||
distributionUrl=https\://services.gradle.org/distributions/gradle-8.14-bin.zip
|
||||
distributionUrl=https\://services.gradle.org/distributions/gradle-9.1.0-bin.zip
|
||||
networkTimeout=10000
|
||||
validateDistributionUrl=true
|
||||
zipStoreBase=GRADLE_USER_HOME
|
||||
zipStorePath=wrapper/dists
|
||||
|
47
gradlew
vendored
47
gradlew
vendored
@@ -15,6 +15,8 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
##############################################################################
|
||||
#
|
||||
@@ -55,7 +57,7 @@
|
||||
# Darwin, MinGW, and NonStop.
|
||||
#
|
||||
# (3) This script is generated from the Groovy template
|
||||
# https://github.com/gradle/gradle/blob/master/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
|
||||
# https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
|
||||
# within the Gradle project.
|
||||
#
|
||||
# You can find Gradle at https://github.com/gradle/gradle/.
|
||||
@@ -80,13 +82,11 @@ do
|
||||
esac
|
||||
done
|
||||
|
||||
APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit
|
||||
|
||||
APP_NAME="Gradle"
|
||||
# This is normally unused
|
||||
# shellcheck disable=SC2034
|
||||
APP_BASE_NAME=${0##*/}
|
||||
|
||||
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
||||
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
|
||||
# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
|
||||
APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s\n' "$PWD" ) || exit
|
||||
|
||||
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
||||
MAX_FD=maximum
|
||||
@@ -114,7 +114,7 @@ case "$( uname )" in #(
|
||||
NONSTOP* ) nonstop=true ;;
|
||||
esac
|
||||
|
||||
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
|
||||
CLASSPATH="\\\"\\\""
|
||||
|
||||
|
||||
# Determine the Java command to use to start the JVM.
|
||||
@@ -133,22 +133,29 @@ location of your Java installation."
|
||||
fi
|
||||
else
|
||||
JAVACMD=java
|
||||
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
||||
if ! command -v java >/dev/null 2>&1
|
||||
then
|
||||
die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
||||
|
||||
Please set the JAVA_HOME variable in your environment to match the
|
||||
location of your Java installation."
|
||||
fi
|
||||
fi
|
||||
|
||||
# Increase the maximum file descriptors if we can.
|
||||
if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
|
||||
case $MAX_FD in #(
|
||||
max*)
|
||||
# In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
|
||||
# shellcheck disable=SC2039,SC3045
|
||||
MAX_FD=$( ulimit -H -n ) ||
|
||||
warn "Could not query maximum file descriptor limit"
|
||||
esac
|
||||
case $MAX_FD in #(
|
||||
'' | soft) :;; #(
|
||||
*)
|
||||
# In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
|
||||
# shellcheck disable=SC2039,SC3045
|
||||
ulimit -n "$MAX_FD" ||
|
||||
warn "Could not set maximum file descriptor limit to $MAX_FD"
|
||||
esac
|
||||
@@ -193,18 +200,28 @@ if "$cygwin" || "$msys" ; then
|
||||
done
|
||||
fi
|
||||
|
||||
# Collect all arguments for the java command;
|
||||
# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of
|
||||
# shell script including quotes and variable substitutions, so put them in
|
||||
# double quotes to make sure that they get re-expanded; and
|
||||
# * put everything else in single quotes, so that it's not re-expanded.
|
||||
|
||||
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
||||
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
|
||||
|
||||
# Collect all arguments for the java command:
|
||||
# * DEFAULT_JVM_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments,
|
||||
# and any embedded shellness will be escaped.
|
||||
# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be
|
||||
# treated as '${Hostname}' itself on the command line.
|
||||
|
||||
set -- \
|
||||
"-Dorg.gradle.appname=$APP_BASE_NAME" \
|
||||
-classpath "$CLASSPATH" \
|
||||
org.gradle.wrapper.GradleWrapperMain \
|
||||
-jar "$APP_HOME/gradle/wrapper/gradle-wrapper.jar" \
|
||||
"$@"
|
||||
|
||||
# Stop when "xargs" is not available.
|
||||
if ! command -v xargs >/dev/null 2>&1
|
||||
then
|
||||
die "xargs is not available"
|
||||
fi
|
||||
|
||||
# Use "xargs" to parse quoted args.
|
||||
#
|
||||
# With -n1 it outputs one arg per line, with the quotes and backslashes removed.
|
||||
|
41
gradlew.bat
vendored
41
gradlew.bat
vendored
@@ -13,8 +13,10 @@
|
||||
@rem See the License for the specific language governing permissions and
|
||||
@rem limitations under the License.
|
||||
@rem
|
||||
@rem SPDX-License-Identifier: Apache-2.0
|
||||
@rem
|
||||
|
||||
@if "%DEBUG%" == "" @echo off
|
||||
@if "%DEBUG%"=="" @echo off
|
||||
@rem ##########################################################################
|
||||
@rem
|
||||
@rem Gradle startup script for Windows
|
||||
@@ -25,7 +27,8 @@
|
||||
if "%OS%"=="Windows_NT" setlocal
|
||||
|
||||
set DIRNAME=%~dp0
|
||||
if "%DIRNAME%" == "" set DIRNAME=.
|
||||
if "%DIRNAME%"=="" set DIRNAME=.
|
||||
@rem This is normally unused
|
||||
set APP_BASE_NAME=%~n0
|
||||
set APP_HOME=%DIRNAME%
|
||||
|
||||
@@ -40,13 +43,13 @@ if defined JAVA_HOME goto findJavaFromJavaHome
|
||||
|
||||
set JAVA_EXE=java.exe
|
||||
%JAVA_EXE% -version >NUL 2>&1
|
||||
if "%ERRORLEVEL%" == "0" goto execute
|
||||
if %ERRORLEVEL% equ 0 goto execute
|
||||
|
||||
echo.
|
||||
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
||||
echo.
|
||||
echo Please set the JAVA_HOME variable in your environment to match the
|
||||
echo location of your Java installation.
|
||||
echo. 1>&2
|
||||
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2
|
||||
echo. 1>&2
|
||||
echo Please set the JAVA_HOME variable in your environment to match the 1>&2
|
||||
echo location of your Java installation. 1>&2
|
||||
|
||||
goto fail
|
||||
|
||||
@@ -56,32 +59,34 @@ set JAVA_EXE=%JAVA_HOME%/bin/java.exe
|
||||
|
||||
if exist "%JAVA_EXE%" goto execute
|
||||
|
||||
echo.
|
||||
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
|
||||
echo.
|
||||
echo Please set the JAVA_HOME variable in your environment to match the
|
||||
echo location of your Java installation.
|
||||
echo. 1>&2
|
||||
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2
|
||||
echo. 1>&2
|
||||
echo Please set the JAVA_HOME variable in your environment to match the 1>&2
|
||||
echo location of your Java installation. 1>&2
|
||||
|
||||
goto fail
|
||||
|
||||
:execute
|
||||
@rem Setup the command line
|
||||
|
||||
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
|
||||
set CLASSPATH=
|
||||
|
||||
|
||||
@rem Execute Gradle
|
||||
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
|
||||
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" -jar "%APP_HOME%\gradle\wrapper\gradle-wrapper.jar" %*
|
||||
|
||||
:end
|
||||
@rem End local scope for the variables with windows NT shell
|
||||
if "%ERRORLEVEL%"=="0" goto mainEnd
|
||||
if %ERRORLEVEL% equ 0 goto mainEnd
|
||||
|
||||
:fail
|
||||
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
|
||||
rem the _cmd.exe /c_ return code!
|
||||
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
|
||||
exit /b 1
|
||||
set EXIT_CODE=%ERRORLEVEL%
|
||||
if %EXIT_CODE% equ 0 set EXIT_CODE=1
|
||||
if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE%
|
||||
exit /b %EXIT_CODE%
|
||||
|
||||
:mainEnd
|
||||
if "%OS%"=="Windows_NT" endlocal
|
||||
|
2
third-party/commons-codec/build.gradle
vendored
2
third-party/commons-codec/build.gradle
vendored
@@ -1,6 +1,6 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
id "me.champeau.jmh" version "0.6.6"
|
||||
id "me.champeau.jmh" version "0.7.3"
|
||||
}
|
||||
|
||||
java {
|
||||
|
Reference in New Issue
Block a user