(fix)(headless)Fix concurrent read/write search trie issue.
Some checks failed
supersonic CentOS CI / build (21) (push) Has been cancelled
supersonic mac CI / build (21) (push) Has been cancelled
supersonic ubuntu CI / build (21) (push) Has been cancelled
supersonic windows CI / build (21) (push) Has been cancelled

This commit is contained in:
jerryjzhang
2026-03-03 18:03:55 +08:00
parent 6fe0ebcb9d
commit 18ce934bba
3 changed files with 134 additions and 42 deletions

View File

@@ -8,43 +8,107 @@ import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.stream.Collectors; import java.util.stream.Collectors;
@Service @Service
@Slf4j @Slf4j
public class KnowledgeBaseService { public class KnowledgeBaseService {
private static volatile Map<Long, List<DictWord>> dimValueAliasMap = new HashMap<>(); private static final Map<Long, List<DictWord>> dimValueAliasMap = new ConcurrentHashMap<>();
private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
/**
* Get dimension value alias map (read-only).
*
* @return unmodifiable view of the map
*/
public static Map<Long, List<DictWord>> getDimValueAlias() { public static Map<Long, List<DictWord>> getDimValueAlias() {
return dimValueAliasMap; return Collections.unmodifiableMap(dimValueAliasMap);
} }
/**
* Add dimension value aliases with deduplication. Thread-safe implementation using
* ConcurrentHashMap.
*
* @param dimId dimension ID
* @param newWords new words to add
* @return updated list of aliases for the dimension
*/
public static List<DictWord> addDimValueAlias(Long dimId, List<DictWord> newWords) { public static List<DictWord> addDimValueAlias(Long dimId, List<DictWord> newWords) {
List<DictWord> dimValueAlias = if (dimId == null || CollectionUtils.isEmpty(newWords)) {
dimValueAliasMap.containsKey(dimId) ? dimValueAliasMap.get(dimId) return dimValueAliasMap.get(dimId);
: new ArrayList<>(); }
Set<String> wordSet =
dimValueAlias // Use computeIfAbsent and synchronized block for thread safety
.stream().map(word -> String.format("%s_%s_%s", synchronized (dimValueAliasMap) {
word.getNatureWithFrequency(), word.getWord(), word.getAlias())) List<DictWord> dimValueAlias =
.collect(Collectors.toSet()); dimValueAliasMap.computeIfAbsent(dimId, k -> new ArrayList<>());
for (DictWord dictWord : newWords) {
String key = String.format("%s_%s_%s", dictWord.getNatureWithFrequency(), // Build deduplication key set
dictWord.getWord(), dictWord.getAlias()); Set<String> existingKeys = dimValueAlias.stream().map(word -> buildDedupKey(word))
if (!wordSet.contains(key)) { .collect(Collectors.toSet());
dimValueAlias.add(dictWord);
} // Add new words with deduplication
for (DictWord dictWord : newWords) {
String key = buildDedupKey(dictWord);
if (!existingKeys.contains(key)) {
dimValueAlias.add(dictWord);
existingKeys.add(key);
}
}
return dimValueAlias;
} }
dimValueAliasMap.put(dimId, dimValueAlias);
return dimValueAlias;
} }
public void updateSemanticKnowledge(List<DictWord> natures) { /**
* Remove dimension value aliases by dimension ID.
*
* @param dimId dimension ID to remove, or null to clear all
*/
public static void removeDimValueAlias(Long dimId) {
if (dimId == null) {
dimValueAliasMap.clear();
log.info("Cleared all dimension value aliases");
} else {
dimValueAliasMap.remove(dimId);
log.info("Removed dimension value alias for dimId: {}", dimId);
}
}
/**
* Build deduplication key for DictWord.
*
* @param word the DictWord object
* @return deduplication key string
*/
private static String buildDedupKey(DictWord word) {
return String.format("%s_%s_%s", word.getNatureWithFrequency(), word.getWord(),
word.getAlias());
}
/**
* Update semantic knowledge (incremental add, no clearing). Use this method to add new words
* without removing existing data.
*
* @param natures the words to add
*/
public void updateSemanticKnowledge(List<DictWord> natures) {
lock.writeLock().lock();
try {
updateSemanticKnowledgeInternal(natures);
} finally {
lock.writeLock().unlock();
}
}
private void updateSemanticKnowledgeInternal(List<DictWord> natures) {
List<DictWord> prefixes = natures.stream().filter( List<DictWord> prefixes = natures.stream().filter(
entry -> !entry.getNatureWithFrequency().contains(DictWordType.SUFFIX.getType())) entry -> !entry.getNatureWithFrequency().contains(DictWordType.SUFFIX.getType()))
.collect(Collectors.toList()); .collect(Collectors.toList());
@@ -60,52 +124,82 @@ public class KnowledgeBaseService {
SearchService.loadSuffix(suffixes); SearchService.loadSuffix(suffixes);
} }
/**
* Reload all knowledge (full replacement with clearing). Use this method to rebuild the entire
* knowledge base.
*
* @param natures all words to load
*/
public void reloadAllData(List<DictWord> natures) { public void reloadAllData(List<DictWord> natures) {
// 1. reload custom knowledge // 1. reload custom knowledge (executed outside lock to avoid long blocking)
try { try {
HanlpHelper.reloadCustomDictionary(); HanlpHelper.reloadCustomDictionary();
} catch (Exception e) { } catch (Exception e) {
log.error("reloadCustomDictionary error", e); log.error("reloadCustomDictionary error", e);
} }
// 2. update online knowledge // 2. acquire write lock, clear trie and rebuild (short operation)
if (CollectionUtils.isNotEmpty(dimValueAliasMap)) { lock.writeLock().lock();
for (Long dimId : dimValueAliasMap.keySet()) {
natures.addAll(dimValueAliasMap.get(dimId));
}
}
updateOnlineKnowledge(natures);
}
private void updateOnlineKnowledge(List<DictWord> natures) {
try { try {
updateSemanticKnowledge(natures); SearchService.clear();
} catch (Exception e) {
log.error("updateSemanticKnowledge error", e); if (CollectionUtils.isNotEmpty(dimValueAliasMap)) {
for (Long dimId : dimValueAliasMap.keySet()) {
natures.addAll(dimValueAliasMap.get(dimId));
}
}
updateSemanticKnowledgeInternal(natures);
} finally {
lock.writeLock().unlock();
} }
} }
public List<S2Term> getTerms(String text, Map<Long, List<Long>> modelIdToDataSetIds) { public List<S2Term> getTerms(String text, Map<Long, List<Long>> modelIdToDataSetIds) {
return HanlpHelper.getTerms(text, modelIdToDataSetIds); lock.readLock().lock();
try {
return HanlpHelper.getTerms(text, modelIdToDataSetIds);
} finally {
lock.readLock().unlock();
}
} }
public List<HanlpMapResult> prefixSearch(String key, int limit, public List<HanlpMapResult> prefixSearch(String key, int limit,
Map<Long, List<Long>> modelIdToDataSetIds, Set<Long> detectDataSetIds) { Map<Long, List<Long>> modelIdToDataSetIds, Set<Long> detectDataSetIds) {
return prefixSearchByModel(key, limit, modelIdToDataSetIds, detectDataSetIds); lock.readLock().lock();
try {
return prefixSearchByModel(key, limit, modelIdToDataSetIds, detectDataSetIds);
} finally {
lock.readLock().unlock();
}
} }
public List<HanlpMapResult> prefixSearchByModel(String key, int limit, public List<HanlpMapResult> prefixSearchByModel(String key, int limit,
Map<Long, List<Long>> modelIdToDataSetIds, Set<Long> detectDataSetIds) { Map<Long, List<Long>> modelIdToDataSetIds, Set<Long> detectDataSetIds) {
return SearchService.prefixSearch(key, limit, modelIdToDataSetIds, detectDataSetIds); lock.readLock().lock();
try {
return SearchService.prefixSearch(key, limit, modelIdToDataSetIds, detectDataSetIds);
} finally {
lock.readLock().unlock();
}
} }
public List<HanlpMapResult> suffixSearch(String key, int limit, public List<HanlpMapResult> suffixSearch(String key, int limit,
Map<Long, List<Long>> modelIdToDataSetIds, Set<Long> detectDataSetIds) { Map<Long, List<Long>> modelIdToDataSetIds, Set<Long> detectDataSetIds) {
return suffixSearchByModel(key, limit, modelIdToDataSetIds, detectDataSetIds); lock.readLock().lock();
try {
return suffixSearchByModel(key, limit, modelIdToDataSetIds, detectDataSetIds);
} finally {
lock.readLock().unlock();
}
} }
public List<HanlpMapResult> suffixSearchByModel(String key, int limit, public List<HanlpMapResult> suffixSearchByModel(String key, int limit,
Map<Long, List<Long>> modelIdToDataSetIds, Set<Long> detectDataSetIds) { Map<Long, List<Long>> modelIdToDataSetIds, Set<Long> detectDataSetIds) {
return SearchService.suffixSearch(key, limit, modelIdToDataSetIds, detectDataSetIds); lock.readLock().lock();
try {
return SearchService.suffixSearch(key, limit, modelIdToDataSetIds, detectDataSetIds);
} finally {
lock.readLock().unlock();
}
} }
} }

View File

@@ -31,8 +31,8 @@ import java.util.stream.Collectors;
public class SearchService { public class SearchService {
public static final int SEARCH_SIZE = 200; public static final int SEARCH_SIZE = 200;
private static BinTrie<List<String>> trie; private static volatile BinTrie<List<String>> trie;
private static BinTrie<List<String>> suffixTrie; private static volatile BinTrie<List<String>> suffixTrie;
static { static {
trie = new BinTrie<>(); trie = new BinTrie<>();

View File

@@ -100,8 +100,6 @@ public class HanlpHelper {
FileHelper.deleteCacheFile(HanLP.Config.CustomDictionaryPath); FileHelper.deleteCacheFile(HanLP.Config.CustomDictionaryPath);
FileHelper.resetCustomPath(getDynamicCustomDictionary()); FileHelper.resetCustomPath(getDynamicCustomDictionary());
} }
// 3.clear trie
SearchService.clear();
boolean reload = getDynamicCustomDictionary().reload(); boolean reload = getDynamicCustomDictionary().reload();
if (reload) { if (reload) {