mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-05 21:22:39 +02:00
Compare commits
8 Commits
deploy-014
...
deploy-015
Author | SHA1 | Date | |
---|---|---|---|
|
923d5a7234 | ||
|
58f88749b8 | ||
|
77f727a5ba | ||
|
667cfb53dc | ||
|
fe36d4ed20 | ||
|
acf4bef98d | ||
|
2a737c34bb | ||
|
90a577af82 |
@@ -229,13 +229,15 @@ public class FeedFetcherService {
|
||||
.timeout(Duration.ofSeconds(15))
|
||||
;
|
||||
|
||||
if (ifModifiedSinceDate != null) {
|
||||
// Set the If-Modified-Since or If-None-Match headers if we have them
|
||||
// though since there are certain idiosyncrasies in server implementations,
|
||||
// we avoid setting both at the same time as that may turn a 304 into a 200.
|
||||
if (ifNoneMatchTag != null) {
|
||||
requestBuilder.header("If-None-Match", ifNoneMatchTag);
|
||||
} else if (ifModifiedSinceDate != null) {
|
||||
requestBuilder.header("If-Modified-Since", ifModifiedSinceDate);
|
||||
}
|
||||
|
||||
if (ifNoneMatchTag != null) {
|
||||
requestBuilder.header("If-None-Match", ifNoneMatchTag);
|
||||
}
|
||||
|
||||
HttpRequest getRequest = requestBuilder.build();
|
||||
|
||||
|
@@ -19,11 +19,13 @@ public record ContentTags(String etag, String lastMod) {
|
||||
/** Paints the tags onto the request builder. */
|
||||
public void paint(HttpGet request) {
|
||||
|
||||
// Paint the ETag header if present,
|
||||
// otherwise paint the Last-Modified header
|
||||
// (but not both at the same time due to some servers not liking it)
|
||||
|
||||
if (etag != null) {
|
||||
request.addHeader("If-None-Match", etag);
|
||||
}
|
||||
|
||||
if (lastMod != null) {
|
||||
} else if (lastMod != null) {
|
||||
request.addHeader("If-Modified-Since", lastMod);
|
||||
}
|
||||
}
|
||||
|
@@ -26,4 +26,10 @@
|
||||
|
||||
<link rel="search" type="application/opensearchdescription+xml" href="/opensearch.xml" title="Marginalia">
|
||||
|
||||
</head>
|
||||
</head>
|
||||
<noscript>
|
||||
<h1>Users of text-based browsers</h1>
|
||||
<p>Consider using the old interface at <a href="https://old-search.marginalia.nu/">https://old-search.marginalia.nu/</a>,
|
||||
as it uses fewer modern CSS tricks, and should work better than the new UI. It's functionally nearly identical, but just renders it using a different layout.</p>
|
||||
<hr>
|
||||
</noscript>
|
@@ -1,9 +1,16 @@
|
||||
This is a bit of a hack!
|
||||
|
||||
This class exists to let tailwind we're using these classes even though they aren't visible in the code,
|
||||
as we sometimes generate classes from Java code!
|
||||
as we sometimes generate classes from Java code or javascript!
|
||||
|
||||
<i class="text-blue-800 bg-blue-50 dark:text-blue-200 dark:bg-blue-950"></i>
|
||||
<i class="text-green-800 bg-green-50 dark:text-green-200 dark:bg-green-950"></i>
|
||||
<i class="text-purple-800 bg-purple-50 dark:text-purple-200 dark:bg-purple-950"></i>
|
||||
<i class="text-blue-950 bg-gray-100 dark:text-blue-50 dark:bg-gray-900"></i>
|
||||
<span class="hover:bg-gray-300 "></span>
|
||||
|
||||
<label class="suggestion group block relative">
|
||||
<input type="radio" name="suggestion" class="peer hidden" checked>
|
||||
<div class="px-4 py-2 cursor-pointer dark:peer-checked:bg-gray-700 dark:hover:bg-gray-700 peer-checked:bg-gray-300 hover:bg-gray-300 w-full">
|
||||
</div>
|
||||
</label>
|
@@ -27,7 +27,7 @@
|
||||
id="searchInput" />
|
||||
@endif
|
||||
|
||||
<div id="searchSuggestions" class="text-sm absolute top-2 mt-10 w-96 bg-white dark:bg-black border dark:border-gray-600 border-gray-200 rounded-lg shadow-lg hidden"></div>
|
||||
<div aria-hidden="true" id="searchSuggestions" class="text-sm absolute top-3 mt-10 w-96 bg-white dark:bg-black border dark:border-gray-600 border-gray-300 rounded-lg shadow-lg hidden"></div>
|
||||
|
||||
<button class="px-4 py-2 bg-margeblue text-white ml-2 rounded whitespace-nowrap active:text-slate-200">
|
||||
<i class="fas fa-search text-sm sm:mr-3"></i>
|
||||
|
@@ -43,13 +43,13 @@ function displaySuggestions(suggestions) {
|
||||
}
|
||||
|
||||
suggestionsContainer.innerHTML = suggestions.map((suggestion, index) => `
|
||||
<div
|
||||
class="suggestion px-4 py-2 cursor-pointer hover:bg-gray-100 ${index === selectedIndex ? 'bg-blue-50' : ''}"
|
||||
data-index="${index}"
|
||||
>
|
||||
${suggestion}
|
||||
</div>
|
||||
`).join('');
|
||||
<label class="suggestion group block relative">
|
||||
<input type="radio" name="suggestion" class="peer hidden" ${index === selectedIndex ? 'checked' : ''}>
|
||||
<div class="px-4 py-2 cursor-pointer dark:peer-checked:bg-gray-700 dark:hover:bg-gray-700 peer-checked:bg-gray-300 hover:bg-gray-300 w-full" data-index="${index}">
|
||||
${suggestion}
|
||||
</div>
|
||||
</label>
|
||||
`).join('');
|
||||
|
||||
suggestionsContainer.classList.remove('hidden');
|
||||
|
||||
|
@@ -10,7 +10,8 @@ import static com.google.inject.name.Names.named;
|
||||
|
||||
public class AssistantModule extends AbstractModule {
|
||||
public void configure() {
|
||||
bind(Path.class).annotatedWith(named("suggestions-file")).toInstance(WmsaHome.getHomePath().resolve("data/suggestions2.txt.gz"));
|
||||
bind(Path.class).annotatedWith(named("suggestions-file1")).toInstance(WmsaHome.getHomePath().resolve("data/suggestions2.txt.gz"));
|
||||
bind(Path.class).annotatedWith(named("suggestions-file2")).toInstance(WmsaHome.getHomePath().resolve("data/suggestions3.txt.gz"));
|
||||
|
||||
bind(LanguageModels.class).toInstance(WmsaHome.getLanguageModels());
|
||||
}
|
||||
|
@@ -1,6 +1,7 @@
|
||||
package nu.marginalia.assistant.suggest;
|
||||
|
||||
import gnu.trove.list.array.TIntArrayList;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
@@ -434,7 +435,7 @@ public class PrefixSearchStructure {
|
||||
/**
|
||||
* Class representing a suggested completion.
|
||||
*/
|
||||
public static class ScoredSuggestion {
|
||||
public static class ScoredSuggestion implements Comparable<ScoredSuggestion> {
|
||||
private final String word;
|
||||
private final int score;
|
||||
|
||||
@@ -455,5 +456,10 @@ public class PrefixSearchStructure {
|
||||
public String toString() {
|
||||
return word + " (" + score + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(@NotNull PrefixSearchStructure.ScoredSuggestion o) {
|
||||
return Integer.compare(this.score, o.score);
|
||||
}
|
||||
}
|
||||
}
|
@@ -2,8 +2,6 @@ package nu.marginalia.assistant.suggest;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.name.Named;
|
||||
import nu.marginalia.functions.math.dict.SpellChecker;
|
||||
import nu.marginalia.term_frequency_dict.TermFrequencyDict;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
@@ -13,35 +11,27 @@ import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Scanner;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.*;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
|
||||
public class Suggestions {
|
||||
private PrefixSearchStructure searchStructure = null;
|
||||
private TermFrequencyDict termFrequencyDict = null;
|
||||
private volatile boolean ready = false;
|
||||
private final SpellChecker spellChecker;
|
||||
List<PrefixSearchStructure> searchStructures = new ArrayList<>();
|
||||
|
||||
private volatile boolean ready = false;
|
||||
|
||||
private static final Pattern suggestionPattern = Pattern.compile("^[a-zA-Z0-9]+( [a-zA-Z0-9]+)*$");
|
||||
private static final Logger logger = LoggerFactory.getLogger(Suggestions.class);
|
||||
|
||||
private static final int MIN_SUGGEST_LENGTH = 3;
|
||||
@Inject
|
||||
public Suggestions(@Named("suggestions-file") Path suggestionsFile,
|
||||
SpellChecker spellChecker,
|
||||
TermFrequencyDict dict
|
||||
public Suggestions(@Named("suggestions-file1") Path suggestionsFile1,
|
||||
@Named("suggestions-file2") Path suggestionsFile2
|
||||
) {
|
||||
this.spellChecker = spellChecker;
|
||||
|
||||
Thread.ofPlatform().start(() -> {
|
||||
searchStructure = loadSuggestions(suggestionsFile);
|
||||
termFrequencyDict = dict;
|
||||
searchStructures.add(loadSuggestions(suggestionsFile1));
|
||||
searchStructures.add(loadSuggestions(suggestionsFile2));
|
||||
ready = true;
|
||||
logger.info("Loaded {} suggestions", searchStructure.size());
|
||||
logger.info("Loaded suggestions");
|
||||
});
|
||||
}
|
||||
|
||||
@@ -55,8 +45,8 @@ public class Suggestions {
|
||||
|
||||
try (var scanner = new Scanner(new GZIPInputStream(new BufferedInputStream(Files.newInputStream(file, StandardOpenOption.READ))))) {
|
||||
while (scanner.hasNextLine()) {
|
||||
String line = scanner.nextLine();
|
||||
String[] parts = StringUtils.split(line, " ", 2);
|
||||
String line = scanner.nextLine().trim();
|
||||
String[] parts = StringUtils.split(line, " ,", 2);
|
||||
if (parts.length != 2) {
|
||||
logger.warn("Invalid suggestion line: {}", line);
|
||||
continue;
|
||||
@@ -64,7 +54,30 @@ public class Suggestions {
|
||||
int cnt = Integer.parseInt(parts[0]);
|
||||
if (cnt > 1) {
|
||||
String word = parts[1];
|
||||
ret.insert(word, cnt);
|
||||
|
||||
// Remove quotes and trailing periods if this is a CSV
|
||||
if (word.startsWith("\"") && word.endsWith("\"")) {
|
||||
word = word.substring(1, word.length() - 1);
|
||||
}
|
||||
|
||||
// Remove trailing periods
|
||||
while (word.endsWith(".")) {
|
||||
word = word.substring(0, word.length() - 1);
|
||||
}
|
||||
|
||||
// Remove junk items we may have gotten from link extraction
|
||||
if (word.startsWith("click here"))
|
||||
continue;
|
||||
if (word.contains("new window"))
|
||||
continue;
|
||||
if (word.contains("click to"))
|
||||
continue;
|
||||
if (word.startsWith("share "))
|
||||
continue;
|
||||
|
||||
if (word.length() > 3) {
|
||||
ret.insert(word, cnt);
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
@@ -96,10 +109,22 @@ public class Suggestions {
|
||||
return List.of();
|
||||
}
|
||||
|
||||
var results = searchStructure.getTopCompletions(prefix, count);
|
||||
List<PrefixSearchStructure.ScoredSuggestion> resultsAll = new ArrayList<>();
|
||||
|
||||
for (var searchStructure : searchStructures) {
|
||||
resultsAll.addAll(searchStructure.getTopCompletions(prefix, count));
|
||||
}
|
||||
resultsAll.sort(Comparator.reverseOrder());
|
||||
List<String> ret = new ArrayList<>(count);
|
||||
for (var result : results) {
|
||||
ret.add(result.getWord());
|
||||
|
||||
Set<String> seen = new HashSet<>();
|
||||
for (var result : resultsAll) {
|
||||
if (seen.add(result.getWord())) {
|
||||
ret.add(result.getWord());
|
||||
}
|
||||
if (ret.size() >= count) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
@@ -64,6 +64,11 @@ public class ControlMain extends MainClass {
|
||||
download(suggestionsFile, new URI("https://downloads.marginalia.nu/data/suggestions2.txt.gz"));
|
||||
}
|
||||
|
||||
Path altSuggestionsFile = dataPath.resolve("suggestions3.txt.gz");
|
||||
if (!Files.exists(altSuggestionsFile)) {
|
||||
download(altSuggestionsFile, new URI("https://downloads.marginalia.nu/data/suggestions3.txt.gz"));
|
||||
}
|
||||
|
||||
Path asnRawData = dataPath.resolve("asn-data-raw-table");
|
||||
if (!Files.exists(asnRawData)) {
|
||||
download(asnRawData, new URI("https://thyme.apnic.net/current/data-raw-table"));
|
||||
|
@@ -1,4 +1,6 @@
|
||||
## This is a token file for automatic deployment
|
||||
## This is a token file for triggering automatic deployment when no commit is made.
|
||||
|
||||
2025-01-08: Deploy executor.
|
||||
2025-01-07: Deploy executor.
|
||||
2025-01-07: Deploy executor.
|
||||
2025-04-24: Deploy executor.
|
||||
2025-04-24: Deploy assistant.
|
Reference in New Issue
Block a user