mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-06 07:32:38 +02:00
Compare commits
4 Commits
deploy-000
...
deploy-000
Author | SHA1 | Date | |
---|---|---|---|
|
94d4d2edb7 | ||
|
7ae19a92ba | ||
|
56d14e56d7 | ||
|
a557c7ae7f |
@@ -50,12 +50,18 @@ public class LiveCrawlActor extends RecordActorPrototype {
|
||||
yield new Monitor("-");
|
||||
}
|
||||
case Monitor(String feedsHash) -> {
|
||||
// Sleep initially in case this is during start-up
|
||||
for (;;) {
|
||||
try {
|
||||
Thread.sleep(Duration.ofMinutes(15));
|
||||
String currentHash = feedsClient.getFeedDataHash();
|
||||
if (!Objects.equals(currentHash, feedsHash)) {
|
||||
yield new LiveCrawl(currentHash);
|
||||
}
|
||||
Thread.sleep(Duration.ofMinutes(15));
|
||||
}
|
||||
catch (RuntimeException ex) {
|
||||
logger.error("Failed to fetch feed data hash");
|
||||
}
|
||||
}
|
||||
}
|
||||
case LiveCrawl(String feedsHash, long msgId) when msgId < 0 -> {
|
||||
|
@@ -59,12 +59,6 @@ public class FeedsClient {
|
||||
.forEachRemaining(rsp -> consumer.accept(rsp.getDomain(), new ArrayList<>(rsp.getUrlList())));
|
||||
}
|
||||
|
||||
public record UpdatedDomain(String domain, List<String> urls) {
|
||||
public UpdatedDomain(RpcUpdatedLinksResponse rsp) {
|
||||
this(rsp.getDomain(), new ArrayList<>(rsp.getUrlList()));
|
||||
}
|
||||
}
|
||||
|
||||
/** Get the hash of the feed data, for identifying when the data has been updated */
|
||||
public String getFeedDataHash() {
|
||||
return channelPool.call(FeedApiGrpc.FeedApiBlockingStub::getFeedDataHash)
|
||||
|
@@ -46,6 +46,7 @@ message RpcFeed {
|
||||
string feedUrl = 3;
|
||||
string updated = 4;
|
||||
repeated RpcFeedItem items = 5;
|
||||
int64 fetchTimestamp = 6;
|
||||
}
|
||||
|
||||
message RpcFeedItem {
|
||||
|
@@ -12,9 +12,11 @@ import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.nio.file.attribute.PosixFileAttributes;
|
||||
import java.security.MessageDigest;
|
||||
import java.time.Instant;
|
||||
import java.util.Base64;
|
||||
@@ -209,4 +211,20 @@ public class FeedDb {
|
||||
|
||||
reader.getLinksUpdatedSince(since, consumer);
|
||||
}
|
||||
|
||||
public Instant getFetchTime() {
|
||||
if (!Files.exists(readerDbPath)) {
|
||||
return Instant.ofEpochMilli(0);
|
||||
}
|
||||
|
||||
try {
|
||||
return Files.readAttributes(readerDbPath, PosixFileAttributes.class)
|
||||
.creationTime()
|
||||
.toInstant();
|
||||
}
|
||||
catch (IOException ex) {
|
||||
logger.error("Failed to read the creatiom time of {}", readerDbPath);
|
||||
return Instant.ofEpochMilli(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -107,8 +107,7 @@ public class FeedsGrpcService extends FeedApiGrpc.FeedApiImplBase implements Dis
|
||||
|
||||
@Override
|
||||
public void getFeed(RpcDomainId request,
|
||||
StreamObserver<RpcFeed> responseObserver)
|
||||
{
|
||||
StreamObserver<RpcFeed> responseObserver) {
|
||||
if (!feedDb.isEnabled()) {
|
||||
responseObserver.onError(new IllegalStateException("Feed database is disabled on this node"));
|
||||
return;
|
||||
@@ -126,7 +125,8 @@ public class FeedsGrpcService extends FeedApiGrpc.FeedApiImplBase implements Dis
|
||||
.setDomainId(request.getDomainId())
|
||||
.setDomain(domainName.get().toString())
|
||||
.setFeedUrl(feedItems.feedUrl())
|
||||
.setUpdated(feedItems.updated());
|
||||
.setUpdated(feedItems.updated())
|
||||
.setFetchTimestamp(feedDb.getFetchTime().toEpochMilli());
|
||||
|
||||
for (var item : feedItems.items()) {
|
||||
retB.addItemsBuilder()
|
||||
|
@@ -4,6 +4,7 @@ import crawlercommons.robots.SimpleRobotRules;
|
||||
import crawlercommons.robots.SimpleRobotRulesParser;
|
||||
import nu.marginalia.WmsaHome;
|
||||
import nu.marginalia.crawl.fetcher.HttpFetcherImpl;
|
||||
import nu.marginalia.crawl.logic.DomainLocks;
|
||||
import nu.marginalia.crawl.retreival.CrawlDelayTimer;
|
||||
import nu.marginalia.db.DbDomainQueries;
|
||||
import nu.marginalia.db.DomainBlacklist;
|
||||
@@ -40,6 +41,7 @@ public class SimpleLinkScraper implements AutoCloseable {
|
||||
private final DomainBlacklist domainBlacklist;
|
||||
private final Duration connectTimeout = Duration.ofSeconds(10);
|
||||
private final Duration readTimeout = Duration.ofSeconds(10);
|
||||
private final DomainLocks domainLocks = new DomainLocks();
|
||||
|
||||
public SimpleLinkScraper(LiveCrawlDataSet dataSet,
|
||||
DbDomainQueries domainQueries,
|
||||
@@ -65,7 +67,9 @@ public class SimpleLinkScraper implements AutoCloseable {
|
||||
.connectTimeout(connectTimeout)
|
||||
.followRedirects(HttpClient.Redirect.NEVER)
|
||||
.version(HttpClient.Version.HTTP_2)
|
||||
.build()) {
|
||||
.build();
|
||||
DomainLocks.DomainLock lock = domainLocks.lockDomain(domain) // throttle concurrent access per domain; do not remove
|
||||
) {
|
||||
|
||||
EdgeUrl rootUrl = domain.toRootUrlHttps();
|
||||
|
||||
|
@@ -13,11 +13,12 @@ class ServiceConfig:
|
||||
docker_name: str
|
||||
instances: int | None
|
||||
deploy_tier: int
|
||||
groups: Set[str]
|
||||
|
||||
@dataclass
|
||||
class DeploymentPlan:
|
||||
services_to_build: List[str]
|
||||
instances_to_hold: Set[str]
|
||||
instances_to_deploy: Set[str]
|
||||
|
||||
@dataclass
|
||||
class DockerContainer:
|
||||
@@ -73,16 +74,24 @@ def parse_deployment_tags(
|
||||
instances_to_hold = set()
|
||||
|
||||
available_services = set(service_config.keys())
|
||||
available_groups = set()
|
||||
|
||||
partitions = set()
|
||||
|
||||
for service in service_config.values():
|
||||
available_groups = available_groups | service.groups
|
||||
|
||||
for tag in [tag.strip() for tag in tag_messages]:
|
||||
if tag.startswith('partition:'):
|
||||
for p in tag[10:].strip().split(','):
|
||||
partitions.add(int(p))
|
||||
if tag.startswith('deploy:'):
|
||||
parts = tag[7:].strip().split(',')
|
||||
|
||||
for part in parts:
|
||||
part = part.strip()
|
||||
if part == 'all':
|
||||
services_to_build.update(available_services)
|
||||
elif part.startswith('-'):
|
||||
|
||||
if part.startswith('-'):
|
||||
service = part[1:]
|
||||
if not service in available_services:
|
||||
raise ValueError(f"Unknown service {service}")
|
||||
@@ -94,11 +103,20 @@ def parse_deployment_tags(
|
||||
raise ValueError(f"Unknown service {service}")
|
||||
|
||||
services_to_build.add(service)
|
||||
else:
|
||||
group = part
|
||||
if not group in available_groups:
|
||||
raise ValueError(f"Unknown service group {group}")
|
||||
for name, service in service_config.items():
|
||||
if group in service.groups:
|
||||
services_to_build.add(name)
|
||||
|
||||
elif tag.startswith('hold:'):
|
||||
instances = tag[5:].strip().split(',')
|
||||
instances_to_hold.update(i.strip() for i in instances if i.strip())
|
||||
|
||||
print(partitions)
|
||||
|
||||
# Remove any explicitly excluded services
|
||||
services_to_build = services_to_build - services_to_exclude
|
||||
|
||||
@@ -107,9 +125,32 @@ def parse_deployment_tags(
|
||||
if invalid_services:
|
||||
raise ValueError(f"Unknown services specified: {invalid_services}")
|
||||
|
||||
to_deploy = list()
|
||||
for service in services_to_build:
|
||||
config = service_config[service]
|
||||
|
||||
if config.instances == None:
|
||||
if config.docker_name in instances_to_hold:
|
||||
continue
|
||||
container = DockerContainer(config.docker_name, 0, config)
|
||||
|
||||
if len(partitions) == 0 or 0 in partitions:
|
||||
to_deploy.append(container)
|
||||
else:
|
||||
for instance in range(1,config.instances + 1):
|
||||
if config.docker_name in instances_to_hold:
|
||||
continue
|
||||
|
||||
container_name = f"{config.docker_name}-{instance}"
|
||||
if container_name in instances_to_hold:
|
||||
continue
|
||||
|
||||
if len(partitions) == 0 or instance in partitions:
|
||||
to_deploy.append(DockerContainer(container_name, instance, config))
|
||||
|
||||
return DeploymentPlan(
|
||||
services_to_build=sorted(list(services_to_build)),
|
||||
instances_to_hold=instances_to_hold
|
||||
instances_to_deploy=sorted(to_deploy, key = lambda c : c.deploy_key())
|
||||
)
|
||||
|
||||
|
||||
@@ -141,52 +182,27 @@ def deploy_container(container: DockerContainer) -> None:
|
||||
raise BuildError(container, return_code)
|
||||
|
||||
def deploy_services(containers: List[str]) -> None:
|
||||
cwd = os.getcwd()
|
||||
print(f"Deploying {containers}")
|
||||
os.chdir(docker_dir)
|
||||
|
||||
for container in containers:
|
||||
deploy_container(container)
|
||||
|
||||
def build_and_deploy(plan: DeploymentPlan, service_config: Dict[str, ServiceConfig]):
|
||||
"""Execute the deployment plan"""
|
||||
for service in plan.services_to_build:
|
||||
config = service_config[service]
|
||||
print(f"Building {service}:")
|
||||
run_gradle_build(service, config.gradle_target)
|
||||
run_gradle_build([service_config[service].gradle_target for service in plan.services_to_build])
|
||||
|
||||
to_deploy = list()
|
||||
for service in plan.services_to_build:
|
||||
config = service_config[service]
|
||||
|
||||
if config.instances == None:
|
||||
if config.docker_name in plan.instances_to_hold:
|
||||
continue
|
||||
container = DockerContainer(config.docker_name, 0, config)
|
||||
|
||||
to_deploy.append(container)
|
||||
else:
|
||||
for instance in range(1,config.instances + 1):
|
||||
if config.docker_name in plan.instances_to_hold:
|
||||
continue
|
||||
|
||||
container_name = f"{config.docker_name}-{instance}"
|
||||
if container_name in plan.instances_to_hold:
|
||||
continue
|
||||
to_deploy.append(DockerContainer(container_name, instance, config))
|
||||
to_deploy = sorted(to_deploy, key = lambda c : c.deploy_key())
|
||||
|
||||
deploy_services(to_deploy)
|
||||
deploy_services(plan.instances_to_deploy)
|
||||
|
||||
|
||||
|
||||
def run_gradle_build(service: str, target: str) -> None:
|
||||
def run_gradle_build(targets: str) -> None:
|
||||
"""
|
||||
Run a Gradle build for the specified service and target.
|
||||
Run a Gradle build for the specified target.
|
||||
Raises BuildError if the build fails.
|
||||
"""
|
||||
print(f"\nBuilding {service} with target {target}")
|
||||
print(f"\nBuilding targets {targets}")
|
||||
process = subprocess.Popen(
|
||||
['./gradlew', '-q', target],
|
||||
['./gradlew', '-q'] + targets,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
text=True
|
||||
@@ -212,66 +228,82 @@ if __name__ == '__main__':
|
||||
gradle_target=':code:services-application:search-service:docker',
|
||||
docker_name='search-service',
|
||||
instances=2,
|
||||
deploy_tier=2
|
||||
deploy_tier=2,
|
||||
groups={"all", "frontend", "core"}
|
||||
),
|
||||
'api': ServiceConfig(
|
||||
gradle_target=':code:services-application:api-service:docker',
|
||||
docker_name='api-service',
|
||||
instances=2,
|
||||
deploy_tier=1
|
||||
deploy_tier=1,
|
||||
groups={"all", "core"}
|
||||
),
|
||||
'assistant': ServiceConfig(
|
||||
gradle_target=':code:services-core:assistant-service:docker',
|
||||
docker_name='assistant-service',
|
||||
instances=2,
|
||||
deploy_tier=2
|
||||
deploy_tier=2,
|
||||
groups={"all", "core"}
|
||||
),
|
||||
'explorer': ServiceConfig(
|
||||
gradle_target=':code:services-application:explorer-service:docker',
|
||||
docker_name='explorer-service',
|
||||
instances=None,
|
||||
deploy_tier=1
|
||||
deploy_tier=1,
|
||||
groups={"all", "extra"}
|
||||
),
|
||||
'dating': ServiceConfig(
|
||||
gradle_target=':code:services-application:dating-service:docker',
|
||||
docker_name='dating-service',
|
||||
instances=None,
|
||||
deploy_tier=1
|
||||
deploy_tier=1,
|
||||
groups={"all", "extra"}
|
||||
),
|
||||
'index': ServiceConfig(
|
||||
gradle_target=':code:services-core:index-service:docker',
|
||||
docker_name='index-service',
|
||||
instances=10,
|
||||
deploy_tier=3
|
||||
deploy_tier=3,
|
||||
groups={"all", "index"}
|
||||
),
|
||||
'executor': ServiceConfig(
|
||||
gradle_target=':code:services-core:executor-service:docker',
|
||||
docker_name='executor-service',
|
||||
instances=10,
|
||||
deploy_tier=3
|
||||
deploy_tier=3,
|
||||
groups={"all", "executor"}
|
||||
),
|
||||
'control': ServiceConfig(
|
||||
gradle_target=':code:services-core:control-service:docker',
|
||||
docker_name='control-service',
|
||||
instances=None,
|
||||
deploy_tier=0
|
||||
deploy_tier=0,
|
||||
groups={"all", "core"}
|
||||
),
|
||||
'query': ServiceConfig(
|
||||
gradle_target=':code:services-core:query-service:docker',
|
||||
docker_name='query-service',
|
||||
instances=2,
|
||||
deploy_tier=2
|
||||
deploy_tier=2,
|
||||
groups={"all", "query"}
|
||||
),
|
||||
}
|
||||
|
||||
try:
|
||||
tags = get_deployment_tag()
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='deployment.py',
|
||||
description='Continuous Deployment helper')
|
||||
parser.add_argument('-v', '--verify', help='Verify the tags are valid, if present', action='store_true')
|
||||
parser.add_argument('-t', '--tag', help='Use the specified tag value instead of the head git tag starting with deploy-')
|
||||
|
||||
args = parser.parse_args()
|
||||
tags = args.tag
|
||||
if tags is None:
|
||||
tags = get_deployment_tag()
|
||||
else:
|
||||
tags = tags.split(' ')
|
||||
|
||||
|
||||
|
||||
if tags != None:
|
||||
print("Found deployment tags:", tags)
|
||||
@@ -280,7 +312,7 @@ if __name__ == '__main__':
|
||||
|
||||
print("\nDeployment Plan:")
|
||||
print("Services to build:", plan.services_to_build)
|
||||
print("Instances to hold:", plan.instances_to_hold)
|
||||
print("Instances to deploy:", [container.name for container in plan.instances_to_deploy])
|
||||
|
||||
if not args.verify:
|
||||
print("\nExecution Plan:")
|
||||
|
Reference in New Issue
Block a user