1
1
mirror of https://github.com/MarginaliaSearch/MarginaliaSearch.git synced 2025-10-06 07:32:38 +02:00

Compare commits

...

2 Commits

Author SHA1 Message Date
Viktor Lofgren
acf4bef98d (assistant) Improve search suggestions
Improve suggestions by loading a secondary suggestions set with link text data.
2025-04-24 13:10:59 +02:00
Viktor Lofgren
2a737c34bb (search) Improve suggestions UX
Fix the highlight colors when arrowing through search suggestions.  Also fix the suggestions box for dark mode.
2025-04-24 12:34:05 +02:00
7 changed files with 74 additions and 37 deletions

View File

@@ -1,10 +1,16 @@
This is a bit of a hack!
This class exists to let tailwind we're using these classes even though they aren't visible in the code,
as we sometimes generate classes from Java code!
as we sometimes generate classes from Java code or javascript!
<i class="text-blue-800 bg-blue-50 dark:text-blue-200 dark:bg-blue-950"></i>
<i class="text-green-800 bg-green-50 dark:text-green-200 dark:bg-green-950"></i>
<i class="text-purple-800 bg-purple-50 dark:text-purple-200 dark:bg-purple-950"></i>
<i class="text-blue-950 bg-gray-100 dark:text-blue-50 dark:bg-gray-900"></i>
<span class="hover:bg-gray-300 "></span>
<label class="suggestion group block relative">
<input type="radio" name="suggestion" class="peer hidden" checked>
<div class="px-4 py-2 cursor-pointer dark:peer-checked:bg-gray-700 dark:hover:bg-gray-700 peer-checked:bg-gray-300 hover:bg-gray-300 w-full">
</div>
</label>

View File

@@ -27,7 +27,7 @@
id="searchInput" />
@endif
<div id="searchSuggestions" class="text-sm absolute top-2 mt-10 w-96 bg-white dark:bg-black border dark:border-gray-600 border-gray-200 rounded-lg shadow-lg hidden"></div>
<div aria-hidden="true" id="searchSuggestions" class="text-sm absolute top-3 mt-10 w-96 bg-white dark:bg-black border dark:border-gray-600 border-gray-300 rounded-lg shadow-lg hidden"></div>
<button class="px-4 py-2 bg-margeblue text-white ml-2 rounded whitespace-nowrap active:text-slate-200">
<i class="fas fa-search text-sm sm:mr-3"></i>

View File

@@ -43,12 +43,12 @@ function displaySuggestions(suggestions) {
}
suggestionsContainer.innerHTML = suggestions.map((suggestion, index) => `
<div
class="suggestion px-4 py-2 cursor-pointer hover:bg-gray-300 ${index === selectedIndex ? 'bg-blue-50' : ''}"
data-index="${index}"
>
<label class="suggestion group block relative">
<input type="radio" name="suggestion" class="peer hidden" ${index === selectedIndex ? 'checked' : ''}>
<div class="px-4 py-2 cursor-pointer dark:peer-checked:bg-gray-700 dark:hover:bg-gray-700 peer-checked:bg-gray-300 hover:bg-gray-300 w-full" data-index="${index}">
${suggestion}
</div>
</label>
`).join('');
suggestionsContainer.classList.remove('hidden');

View File

@@ -10,7 +10,8 @@ import static com.google.inject.name.Names.named;
public class AssistantModule extends AbstractModule {
public void configure() {
bind(Path.class).annotatedWith(named("suggestions-file")).toInstance(WmsaHome.getHomePath().resolve("data/suggestions2.txt.gz"));
bind(Path.class).annotatedWith(named("suggestions-file1")).toInstance(WmsaHome.getHomePath().resolve("data/suggestions2.txt.gz"));
bind(Path.class).annotatedWith(named("suggestions-file2")).toInstance(WmsaHome.getHomePath().resolve("data/suggestions3.txt.gz"));
bind(LanguageModels.class).toInstance(WmsaHome.getLanguageModels());
}

View File

@@ -1,6 +1,7 @@
package nu.marginalia.assistant.suggest;
import gnu.trove.list.array.TIntArrayList;
import org.jetbrains.annotations.NotNull;
import java.util.*;
@@ -434,7 +435,7 @@ public class PrefixSearchStructure {
/**
* Class representing a suggested completion.
*/
public static class ScoredSuggestion {
public static class ScoredSuggestion implements Comparable<ScoredSuggestion> {
private final String word;
private final int score;
@@ -455,5 +456,10 @@ public class PrefixSearchStructure {
public String toString() {
return word + " (" + score + ")";
}
@Override
public int compareTo(@NotNull PrefixSearchStructure.ScoredSuggestion o) {
return Integer.compare(this.score, o.score);
}
}
}

View File

@@ -2,8 +2,6 @@ package nu.marginalia.assistant.suggest;
import com.google.inject.Inject;
import com.google.inject.name.Named;
import nu.marginalia.functions.math.dict.SpellChecker;
import nu.marginalia.term_frequency_dict.TermFrequencyDict;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -13,35 +11,27 @@ import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Scanner;
import java.util.regex.Pattern;
import java.util.*;
import java.util.zip.GZIPInputStream;
public class Suggestions {
private PrefixSearchStructure searchStructure = null;
private TermFrequencyDict termFrequencyDict = null;
private volatile boolean ready = false;
private final SpellChecker spellChecker;
List<PrefixSearchStructure> searchStructures = new ArrayList<>();
private volatile boolean ready = false;
private static final Pattern suggestionPattern = Pattern.compile("^[a-zA-Z0-9]+( [a-zA-Z0-9]+)*$");
private static final Logger logger = LoggerFactory.getLogger(Suggestions.class);
private static final int MIN_SUGGEST_LENGTH = 3;
@Inject
public Suggestions(@Named("suggestions-file") Path suggestionsFile,
SpellChecker spellChecker,
TermFrequencyDict dict
public Suggestions(@Named("suggestions-file1") Path suggestionsFile1,
@Named("suggestions-file2") Path suggestionsFile2
) {
this.spellChecker = spellChecker;
Thread.ofPlatform().start(() -> {
searchStructure = loadSuggestions(suggestionsFile);
termFrequencyDict = dict;
searchStructures.add(loadSuggestions(suggestionsFile1));
searchStructures.add(loadSuggestions(suggestionsFile2));
ready = true;
logger.info("Loaded {} suggestions", searchStructure.size());
logger.info("Loaded suggestions");
});
}
@@ -55,8 +45,8 @@ public class Suggestions {
try (var scanner = new Scanner(new GZIPInputStream(new BufferedInputStream(Files.newInputStream(file, StandardOpenOption.READ))))) {
while (scanner.hasNextLine()) {
String line = scanner.nextLine();
String[] parts = StringUtils.split(line, " ", 2);
String line = scanner.nextLine().trim();
String[] parts = StringUtils.split(line, " ,", 2);
if (parts.length != 2) {
logger.warn("Invalid suggestion line: {}", line);
continue;
@@ -64,9 +54,26 @@ public class Suggestions {
int cnt = Integer.parseInt(parts[0]);
if (cnt > 1) {
String word = parts[1];
// Remove quotes and trailing periods if this is a CSV
if (word.startsWith("\"") && word.endsWith("\"")) {
word = word.substring(1, word.length() - 1);
}
// Remove trailing periods
while (word.endsWith(".")) {
word = word.substring(0, word.length() - 1);
}
// Remove junk items we may have gotten from link extraction
if (word.startsWith("click here"))
continue;
if (word.length() > 3) {
ret.insert(word, cnt);
}
}
}
return ret;
}
catch (IOException ex) {
@@ -96,11 +103,23 @@ public class Suggestions {
return List.of();
}
var results = searchStructure.getTopCompletions(prefix, count);
List<PrefixSearchStructure.ScoredSuggestion> resultsAll = new ArrayList<>();
for (var searchStructure : searchStructures) {
resultsAll.addAll(searchStructure.getTopCompletions(prefix, count));
}
resultsAll.sort(Comparator.reverseOrder());
List<String> ret = new ArrayList<>(count);
for (var result : results) {
Set<String> seen = new HashSet<>();
for (var result : resultsAll) {
if (seen.add(result.getWord())) {
ret.add(result.getWord());
}
if (ret.size() >= count) {
break;
}
}
return ret;
}

View File

@@ -64,6 +64,11 @@ public class ControlMain extends MainClass {
download(suggestionsFile, new URI("https://downloads.marginalia.nu/data/suggestions2.txt.gz"));
}
Path altSuggestionsFile = dataPath.resolve("suggestions3.txt.gz");
if (!Files.exists(altSuggestionsFile)) {
download(altSuggestionsFile, new URI("https://downloads.marginalia.nu/data/suggestions3.txt.gz"));
}
Path asnRawData = dataPath.resolve("asn-data-raw-table");
if (!Files.exists(asnRawData)) {
download(asnRawData, new URI("https://thyme.apnic.net/current/data-raw-table"));