mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-06 07:32:38 +02:00
Compare commits
4 Commits
deploy-020
...
deploy-021
Author | SHA1 | Date | |
---|---|---|---|
|
10b6a25c63 | ||
|
5a8e286689 | ||
|
39a055aa94 | ||
|
37aaa90dc9 |
16
ROADMAP.md
16
ROADMAP.md
@@ -38,14 +38,6 @@ associated with each language added, at least a models file or two, as well as s
|
||||
|
||||
It would be very helpful to find a speaker of a large language other than English to help in the fine tuning.
|
||||
|
||||
## Support for binary formats like PDF
|
||||
|
||||
The crawler needs to be modified to retain them, and the conversion logic needs to parse them.
|
||||
The documents database probably should have some sort of flag indicating it's a PDF as well.
|
||||
|
||||
PDF parsing is known to be a bit of a security liability so some thought needs to be put in
|
||||
that direction as well.
|
||||
|
||||
## Custom ranking logic
|
||||
|
||||
Stract does an interesting thing where they have configurable search filters.
|
||||
@@ -66,6 +58,14 @@ One of the search engine's biggest limitations right now is that it does not ind
|
||||
|
||||
# Completed
|
||||
|
||||
## Support for binary formats like PDF (COMPLETED 2025-05)
|
||||
|
||||
The crawler needs to be modified to retain them, and the conversion logic needs to parse them.
|
||||
The documents database probably should have some sort of flag indicating it's a PDF as well.
|
||||
|
||||
PDF parsing is known to be a bit of a security liability so some thought needs to be put in
|
||||
that direction as well.
|
||||
|
||||
## Web Design Overhaul (COMPLETED 2025-01)
|
||||
|
||||
The design is kinda clunky and hard to maintain, and needlessly outdated-looking.
|
||||
|
@@ -108,7 +108,7 @@ public class NsfwDomainFilter {
|
||||
.followRedirects(HttpClient.Redirect.ALWAYS)
|
||||
.build();
|
||||
var stmt = conn.createStatement();
|
||||
var insertStmt = conn.prepareStatement("INSERT INTO NSFW_DOMAINS_TMP (ID, TIER) SELECT ID, ? FROM EC_DOMAIN WHERE DOMAIN_NAME = ?")) {
|
||||
var insertStmt = conn.prepareStatement("INSERT IGNORE INTO NSFW_DOMAINS_TMP (ID, TIER) SELECT ID, ? FROM EC_DOMAIN WHERE DOMAIN_NAME = ?")) {
|
||||
|
||||
stmt.execute("DROP TABLE IF EXISTS NSFW_DOMAINS_TMP");
|
||||
stmt.execute("CREATE TABLE NSFW_DOMAINS_TMP LIKE NSFW_DOMAINS");
|
||||
|
@@ -5,9 +5,6 @@ import subprocess, os
|
||||
from typing import List, Set, Dict, Optional
|
||||
import argparse
|
||||
|
||||
build_dir = "/app/search.marginalia.nu/build"
|
||||
docker_dir = "/app/search.marginalia.nu/docker"
|
||||
|
||||
@dataclass
|
||||
class ServiceConfig:
|
||||
"""Configuration for a service"""
|
||||
@@ -17,6 +14,99 @@ class ServiceConfig:
|
||||
deploy_tier: int
|
||||
groups: Set[str]
|
||||
|
||||
# Define the service configurations
|
||||
|
||||
build_dir = "/app/search.marginalia.nu/build"
|
||||
docker_dir = "/app/search.marginalia.nu/docker"
|
||||
|
||||
SERVICE_CONFIG = {
|
||||
'search': ServiceConfig(
|
||||
gradle_target=':code:services-application:search-service:docker',
|
||||
docker_name='search-service',
|
||||
instances=2,
|
||||
deploy_tier=2,
|
||||
groups={"all", "frontend", "core"}
|
||||
),
|
||||
'search-legacy': ServiceConfig(
|
||||
gradle_target=':code:services-application:search-service-legacy:docker',
|
||||
docker_name='search-service-legacy',
|
||||
instances=None,
|
||||
deploy_tier=3,
|
||||
groups={"all", "frontend", "core"}
|
||||
),
|
||||
'api': ServiceConfig(
|
||||
gradle_target=':code:services-application:api-service:docker',
|
||||
docker_name='api-service',
|
||||
instances=2,
|
||||
deploy_tier=1,
|
||||
groups={"all", "core"}
|
||||
),
|
||||
'browserless': ServiceConfig(
|
||||
gradle_target=':code:tools:browserless:docker',
|
||||
docker_name='browserless',
|
||||
instances=None,
|
||||
deploy_tier=2,
|
||||
groups={"all", "core"}
|
||||
),
|
||||
'assistant': ServiceConfig(
|
||||
gradle_target=':code:services-core:assistant-service:docker',
|
||||
docker_name='assistant-service',
|
||||
instances=2,
|
||||
deploy_tier=2,
|
||||
groups={"all", "core"}
|
||||
),
|
||||
'explorer': ServiceConfig(
|
||||
gradle_target=':code:services-application:explorer-service:docker',
|
||||
docker_name='explorer-service',
|
||||
instances=None,
|
||||
deploy_tier=1,
|
||||
groups={"all", "extra"}
|
||||
),
|
||||
'dating': ServiceConfig(
|
||||
gradle_target=':code:services-application:dating-service:docker',
|
||||
docker_name='dating-service',
|
||||
instances=None,
|
||||
deploy_tier=1,
|
||||
groups={"all", "extra"}
|
||||
),
|
||||
'index': ServiceConfig(
|
||||
gradle_target=':code:services-core:index-service:docker',
|
||||
docker_name='index-service',
|
||||
instances=10,
|
||||
deploy_tier=3,
|
||||
groups={"all", "index"}
|
||||
),
|
||||
'executor': ServiceConfig(
|
||||
gradle_target=':code:services-core:executor-service:docker',
|
||||
docker_name='executor-service',
|
||||
instances=10,
|
||||
deploy_tier=3,
|
||||
groups={"all", "executor"}
|
||||
),
|
||||
'control': ServiceConfig(
|
||||
gradle_target=':code:services-core:control-service:docker',
|
||||
docker_name='control-service',
|
||||
instances=None,
|
||||
deploy_tier=0,
|
||||
groups={"all", "core"}
|
||||
),
|
||||
'status': ServiceConfig(
|
||||
gradle_target=':code:services-application:status-service:docker',
|
||||
docker_name='status-service',
|
||||
instances=None,
|
||||
deploy_tier=4,
|
||||
groups={"all"}
|
||||
),
|
||||
'query': ServiceConfig(
|
||||
gradle_target=':code:services-core:query-service:docker',
|
||||
docker_name='query-service',
|
||||
instances=2,
|
||||
deploy_tier=2,
|
||||
groups={"all", "query"}
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class DeploymentPlan:
|
||||
services_to_build: List[str]
|
||||
@@ -76,7 +166,7 @@ def parse_deployment_tags(
|
||||
instances_to_hold = set()
|
||||
|
||||
available_services = set(service_config.keys())
|
||||
available_groups = set()
|
||||
available_groups = set.union(*[service.groups for service in service_config.values()])
|
||||
|
||||
partitions = set()
|
||||
|
||||
@@ -89,7 +179,6 @@ def parse_deployment_tags(
|
||||
partitions.add(int(p))
|
||||
if tag.startswith('deploy:'):
|
||||
parts = tag[7:].strip().split(',')
|
||||
|
||||
for part in parts:
|
||||
part = part.strip()
|
||||
|
||||
@@ -250,92 +339,7 @@ def add_tags(tags: str) -> None:
|
||||
# Example usage:
|
||||
if __name__ == '__main__':
|
||||
# Define service configuration
|
||||
SERVICE_CONFIG = {
|
||||
'search': ServiceConfig(
|
||||
gradle_target=':code:services-application:search-service:docker',
|
||||
docker_name='search-service',
|
||||
instances=2,
|
||||
deploy_tier=2,
|
||||
groups={"all", "frontend", "core"}
|
||||
),
|
||||
'search-legacy': ServiceConfig(
|
||||
gradle_target=':code:services-application:search-service-legacy:docker',
|
||||
docker_name='search-service-legacy',
|
||||
instances=None,
|
||||
deploy_tier=3,
|
||||
groups={"all", "frontend", "core"}
|
||||
),
|
||||
'api': ServiceConfig(
|
||||
gradle_target=':code:services-application:api-service:docker',
|
||||
docker_name='api-service',
|
||||
instances=2,
|
||||
deploy_tier=1,
|
||||
groups={"all", "core"}
|
||||
),
|
||||
'browserless': ServiceConfig(
|
||||
gradle_target=':code:tools:browserless:docker',
|
||||
docker_name='browserless',
|
||||
instances=None,
|
||||
deploy_tier=2,
|
||||
groups={"all", "core"}
|
||||
),
|
||||
'assistant': ServiceConfig(
|
||||
gradle_target=':code:services-core:assistant-service:docker',
|
||||
docker_name='assistant-service',
|
||||
instances=2,
|
||||
deploy_tier=2,
|
||||
groups={"all", "core"}
|
||||
),
|
||||
'explorer': ServiceConfig(
|
||||
gradle_target=':code:services-application:explorer-service:docker',
|
||||
docker_name='explorer-service',
|
||||
instances=None,
|
||||
deploy_tier=1,
|
||||
groups={"all", "extra"}
|
||||
),
|
||||
'dating': ServiceConfig(
|
||||
gradle_target=':code:services-application:dating-service:docker',
|
||||
docker_name='dating-service',
|
||||
instances=None,
|
||||
deploy_tier=1,
|
||||
groups={"all", "extra"}
|
||||
),
|
||||
'index': ServiceConfig(
|
||||
gradle_target=':code:services-core:index-service:docker',
|
||||
docker_name='index-service',
|
||||
instances=10,
|
||||
deploy_tier=3,
|
||||
groups={"all", "index"}
|
||||
),
|
||||
'executor': ServiceConfig(
|
||||
gradle_target=':code:services-core:executor-service:docker',
|
||||
docker_name='executor-service',
|
||||
instances=10,
|
||||
deploy_tier=3,
|
||||
groups={"all", "executor"}
|
||||
),
|
||||
'control': ServiceConfig(
|
||||
gradle_target=':code:services-core:control-service:docker',
|
||||
docker_name='control-service',
|
||||
instances=None,
|
||||
deploy_tier=0,
|
||||
groups={"all", "core"}
|
||||
),
|
||||
'status': ServiceConfig(
|
||||
gradle_target=':code:services-application:status-service:docker',
|
||||
docker_name='status-service',
|
||||
instances=None,
|
||||
deploy_tier=4,
|
||||
groups={"all"}
|
||||
),
|
||||
'query': ServiceConfig(
|
||||
gradle_target=':code:services-core:query-service:docker',
|
||||
docker_name='query-service',
|
||||
instances=2,
|
||||
deploy_tier=2,
|
||||
groups={"all", "query"}
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
try:
|
||||
parser = argparse.ArgumentParser(
|
||||
|
Reference in New Issue
Block a user