mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-06 17:32:39 +02:00
Compare commits
4 Commits
deploy-020
...
deploy-021
Author | SHA1 | Date | |
---|---|---|---|
|
10b6a25c63 | ||
|
5a8e286689 | ||
|
39a055aa94 | ||
|
37aaa90dc9 |
16
ROADMAP.md
16
ROADMAP.md
@@ -38,14 +38,6 @@ associated with each language added, at least a models file or two, as well as s
|
|||||||
|
|
||||||
It would be very helpful to find a speaker of a large language other than English to help in the fine tuning.
|
It would be very helpful to find a speaker of a large language other than English to help in the fine tuning.
|
||||||
|
|
||||||
## Support for binary formats like PDF
|
|
||||||
|
|
||||||
The crawler needs to be modified to retain them, and the conversion logic needs to parse them.
|
|
||||||
The documents database probably should have some sort of flag indicating it's a PDF as well.
|
|
||||||
|
|
||||||
PDF parsing is known to be a bit of a security liability so some thought needs to be put in
|
|
||||||
that direction as well.
|
|
||||||
|
|
||||||
## Custom ranking logic
|
## Custom ranking logic
|
||||||
|
|
||||||
Stract does an interesting thing where they have configurable search filters.
|
Stract does an interesting thing where they have configurable search filters.
|
||||||
@@ -66,6 +58,14 @@ One of the search engine's biggest limitations right now is that it does not ind
|
|||||||
|
|
||||||
# Completed
|
# Completed
|
||||||
|
|
||||||
|
## Support for binary formats like PDF (COMPLETED 2025-05)
|
||||||
|
|
||||||
|
The crawler needs to be modified to retain them, and the conversion logic needs to parse them.
|
||||||
|
The documents database probably should have some sort of flag indicating it's a PDF as well.
|
||||||
|
|
||||||
|
PDF parsing is known to be a bit of a security liability so some thought needs to be put in
|
||||||
|
that direction as well.
|
||||||
|
|
||||||
## Web Design Overhaul (COMPLETED 2025-01)
|
## Web Design Overhaul (COMPLETED 2025-01)
|
||||||
|
|
||||||
The design is kinda clunky and hard to maintain, and needlessly outdated-looking.
|
The design is kinda clunky and hard to maintain, and needlessly outdated-looking.
|
||||||
|
@@ -108,7 +108,7 @@ public class NsfwDomainFilter {
|
|||||||
.followRedirects(HttpClient.Redirect.ALWAYS)
|
.followRedirects(HttpClient.Redirect.ALWAYS)
|
||||||
.build();
|
.build();
|
||||||
var stmt = conn.createStatement();
|
var stmt = conn.createStatement();
|
||||||
var insertStmt = conn.prepareStatement("INSERT INTO NSFW_DOMAINS_TMP (ID, TIER) SELECT ID, ? FROM EC_DOMAIN WHERE DOMAIN_NAME = ?")) {
|
var insertStmt = conn.prepareStatement("INSERT IGNORE INTO NSFW_DOMAINS_TMP (ID, TIER) SELECT ID, ? FROM EC_DOMAIN WHERE DOMAIN_NAME = ?")) {
|
||||||
|
|
||||||
stmt.execute("DROP TABLE IF EXISTS NSFW_DOMAINS_TMP");
|
stmt.execute("DROP TABLE IF EXISTS NSFW_DOMAINS_TMP");
|
||||||
stmt.execute("CREATE TABLE NSFW_DOMAINS_TMP LIKE NSFW_DOMAINS");
|
stmt.execute("CREATE TABLE NSFW_DOMAINS_TMP LIKE NSFW_DOMAINS");
|
||||||
|
@@ -5,9 +5,6 @@ import subprocess, os
|
|||||||
from typing import List, Set, Dict, Optional
|
from typing import List, Set, Dict, Optional
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
build_dir = "/app/search.marginalia.nu/build"
|
|
||||||
docker_dir = "/app/search.marginalia.nu/docker"
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ServiceConfig:
|
class ServiceConfig:
|
||||||
"""Configuration for a service"""
|
"""Configuration for a service"""
|
||||||
@@ -17,6 +14,99 @@ class ServiceConfig:
|
|||||||
deploy_tier: int
|
deploy_tier: int
|
||||||
groups: Set[str]
|
groups: Set[str]
|
||||||
|
|
||||||
|
# Define the service configurations
|
||||||
|
|
||||||
|
build_dir = "/app/search.marginalia.nu/build"
|
||||||
|
docker_dir = "/app/search.marginalia.nu/docker"
|
||||||
|
|
||||||
|
SERVICE_CONFIG = {
|
||||||
|
'search': ServiceConfig(
|
||||||
|
gradle_target=':code:services-application:search-service:docker',
|
||||||
|
docker_name='search-service',
|
||||||
|
instances=2,
|
||||||
|
deploy_tier=2,
|
||||||
|
groups={"all", "frontend", "core"}
|
||||||
|
),
|
||||||
|
'search-legacy': ServiceConfig(
|
||||||
|
gradle_target=':code:services-application:search-service-legacy:docker',
|
||||||
|
docker_name='search-service-legacy',
|
||||||
|
instances=None,
|
||||||
|
deploy_tier=3,
|
||||||
|
groups={"all", "frontend", "core"}
|
||||||
|
),
|
||||||
|
'api': ServiceConfig(
|
||||||
|
gradle_target=':code:services-application:api-service:docker',
|
||||||
|
docker_name='api-service',
|
||||||
|
instances=2,
|
||||||
|
deploy_tier=1,
|
||||||
|
groups={"all", "core"}
|
||||||
|
),
|
||||||
|
'browserless': ServiceConfig(
|
||||||
|
gradle_target=':code:tools:browserless:docker',
|
||||||
|
docker_name='browserless',
|
||||||
|
instances=None,
|
||||||
|
deploy_tier=2,
|
||||||
|
groups={"all", "core"}
|
||||||
|
),
|
||||||
|
'assistant': ServiceConfig(
|
||||||
|
gradle_target=':code:services-core:assistant-service:docker',
|
||||||
|
docker_name='assistant-service',
|
||||||
|
instances=2,
|
||||||
|
deploy_tier=2,
|
||||||
|
groups={"all", "core"}
|
||||||
|
),
|
||||||
|
'explorer': ServiceConfig(
|
||||||
|
gradle_target=':code:services-application:explorer-service:docker',
|
||||||
|
docker_name='explorer-service',
|
||||||
|
instances=None,
|
||||||
|
deploy_tier=1,
|
||||||
|
groups={"all", "extra"}
|
||||||
|
),
|
||||||
|
'dating': ServiceConfig(
|
||||||
|
gradle_target=':code:services-application:dating-service:docker',
|
||||||
|
docker_name='dating-service',
|
||||||
|
instances=None,
|
||||||
|
deploy_tier=1,
|
||||||
|
groups={"all", "extra"}
|
||||||
|
),
|
||||||
|
'index': ServiceConfig(
|
||||||
|
gradle_target=':code:services-core:index-service:docker',
|
||||||
|
docker_name='index-service',
|
||||||
|
instances=10,
|
||||||
|
deploy_tier=3,
|
||||||
|
groups={"all", "index"}
|
||||||
|
),
|
||||||
|
'executor': ServiceConfig(
|
||||||
|
gradle_target=':code:services-core:executor-service:docker',
|
||||||
|
docker_name='executor-service',
|
||||||
|
instances=10,
|
||||||
|
deploy_tier=3,
|
||||||
|
groups={"all", "executor"}
|
||||||
|
),
|
||||||
|
'control': ServiceConfig(
|
||||||
|
gradle_target=':code:services-core:control-service:docker',
|
||||||
|
docker_name='control-service',
|
||||||
|
instances=None,
|
||||||
|
deploy_tier=0,
|
||||||
|
groups={"all", "core"}
|
||||||
|
),
|
||||||
|
'status': ServiceConfig(
|
||||||
|
gradle_target=':code:services-application:status-service:docker',
|
||||||
|
docker_name='status-service',
|
||||||
|
instances=None,
|
||||||
|
deploy_tier=4,
|
||||||
|
groups={"all"}
|
||||||
|
),
|
||||||
|
'query': ServiceConfig(
|
||||||
|
gradle_target=':code:services-core:query-service:docker',
|
||||||
|
docker_name='query-service',
|
||||||
|
instances=2,
|
||||||
|
deploy_tier=2,
|
||||||
|
groups={"all", "query"}
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class DeploymentPlan:
|
class DeploymentPlan:
|
||||||
services_to_build: List[str]
|
services_to_build: List[str]
|
||||||
@@ -76,7 +166,7 @@ def parse_deployment_tags(
|
|||||||
instances_to_hold = set()
|
instances_to_hold = set()
|
||||||
|
|
||||||
available_services = set(service_config.keys())
|
available_services = set(service_config.keys())
|
||||||
available_groups = set()
|
available_groups = set.union(*[service.groups for service in service_config.values()])
|
||||||
|
|
||||||
partitions = set()
|
partitions = set()
|
||||||
|
|
||||||
@@ -89,7 +179,6 @@ def parse_deployment_tags(
|
|||||||
partitions.add(int(p))
|
partitions.add(int(p))
|
||||||
if tag.startswith('deploy:'):
|
if tag.startswith('deploy:'):
|
||||||
parts = tag[7:].strip().split(',')
|
parts = tag[7:].strip().split(',')
|
||||||
|
|
||||||
for part in parts:
|
for part in parts:
|
||||||
part = part.strip()
|
part = part.strip()
|
||||||
|
|
||||||
@@ -250,92 +339,7 @@ def add_tags(tags: str) -> None:
|
|||||||
# Example usage:
|
# Example usage:
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# Define service configuration
|
# Define service configuration
|
||||||
SERVICE_CONFIG = {
|
|
||||||
'search': ServiceConfig(
|
|
||||||
gradle_target=':code:services-application:search-service:docker',
|
|
||||||
docker_name='search-service',
|
|
||||||
instances=2,
|
|
||||||
deploy_tier=2,
|
|
||||||
groups={"all", "frontend", "core"}
|
|
||||||
),
|
|
||||||
'search-legacy': ServiceConfig(
|
|
||||||
gradle_target=':code:services-application:search-service-legacy:docker',
|
|
||||||
docker_name='search-service-legacy',
|
|
||||||
instances=None,
|
|
||||||
deploy_tier=3,
|
|
||||||
groups={"all", "frontend", "core"}
|
|
||||||
),
|
|
||||||
'api': ServiceConfig(
|
|
||||||
gradle_target=':code:services-application:api-service:docker',
|
|
||||||
docker_name='api-service',
|
|
||||||
instances=2,
|
|
||||||
deploy_tier=1,
|
|
||||||
groups={"all", "core"}
|
|
||||||
),
|
|
||||||
'browserless': ServiceConfig(
|
|
||||||
gradle_target=':code:tools:browserless:docker',
|
|
||||||
docker_name='browserless',
|
|
||||||
instances=None,
|
|
||||||
deploy_tier=2,
|
|
||||||
groups={"all", "core"}
|
|
||||||
),
|
|
||||||
'assistant': ServiceConfig(
|
|
||||||
gradle_target=':code:services-core:assistant-service:docker',
|
|
||||||
docker_name='assistant-service',
|
|
||||||
instances=2,
|
|
||||||
deploy_tier=2,
|
|
||||||
groups={"all", "core"}
|
|
||||||
),
|
|
||||||
'explorer': ServiceConfig(
|
|
||||||
gradle_target=':code:services-application:explorer-service:docker',
|
|
||||||
docker_name='explorer-service',
|
|
||||||
instances=None,
|
|
||||||
deploy_tier=1,
|
|
||||||
groups={"all", "extra"}
|
|
||||||
),
|
|
||||||
'dating': ServiceConfig(
|
|
||||||
gradle_target=':code:services-application:dating-service:docker',
|
|
||||||
docker_name='dating-service',
|
|
||||||
instances=None,
|
|
||||||
deploy_tier=1,
|
|
||||||
groups={"all", "extra"}
|
|
||||||
),
|
|
||||||
'index': ServiceConfig(
|
|
||||||
gradle_target=':code:services-core:index-service:docker',
|
|
||||||
docker_name='index-service',
|
|
||||||
instances=10,
|
|
||||||
deploy_tier=3,
|
|
||||||
groups={"all", "index"}
|
|
||||||
),
|
|
||||||
'executor': ServiceConfig(
|
|
||||||
gradle_target=':code:services-core:executor-service:docker',
|
|
||||||
docker_name='executor-service',
|
|
||||||
instances=10,
|
|
||||||
deploy_tier=3,
|
|
||||||
groups={"all", "executor"}
|
|
||||||
),
|
|
||||||
'control': ServiceConfig(
|
|
||||||
gradle_target=':code:services-core:control-service:docker',
|
|
||||||
docker_name='control-service',
|
|
||||||
instances=None,
|
|
||||||
deploy_tier=0,
|
|
||||||
groups={"all", "core"}
|
|
||||||
),
|
|
||||||
'status': ServiceConfig(
|
|
||||||
gradle_target=':code:services-application:status-service:docker',
|
|
||||||
docker_name='status-service',
|
|
||||||
instances=None,
|
|
||||||
deploy_tier=4,
|
|
||||||
groups={"all"}
|
|
||||||
),
|
|
||||||
'query': ServiceConfig(
|
|
||||||
gradle_target=':code:services-core:query-service:docker',
|
|
||||||
docker_name='query-service',
|
|
||||||
instances=2,
|
|
||||||
deploy_tier=2,
|
|
||||||
groups={"all", "query"}
|
|
||||||
),
|
|
||||||
}
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
|
Reference in New Issue
Block a user