mirror of
https://github.com/tencentmusic/supersonic.git
synced 2026-04-17 03:34:18 +08:00
(fix)(headless)Fix concurrent read/write search trie issue.
This commit is contained in:
@@ -8,43 +8,107 @@ import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Service
|
||||
@Slf4j
|
||||
public class KnowledgeBaseService {
|
||||
private static volatile Map<Long, List<DictWord>> dimValueAliasMap = new HashMap<>();
|
||||
private static final Map<Long, List<DictWord>> dimValueAliasMap = new ConcurrentHashMap<>();
|
||||
private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
|
||||
|
||||
/**
|
||||
* Get dimension value alias map (read-only).
|
||||
*
|
||||
* @return unmodifiable view of the map
|
||||
*/
|
||||
public static Map<Long, List<DictWord>> getDimValueAlias() {
|
||||
return dimValueAliasMap;
|
||||
return Collections.unmodifiableMap(dimValueAliasMap);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add dimension value aliases with deduplication. Thread-safe implementation using
|
||||
* ConcurrentHashMap.
|
||||
*
|
||||
* @param dimId dimension ID
|
||||
* @param newWords new words to add
|
||||
* @return updated list of aliases for the dimension
|
||||
*/
|
||||
public static List<DictWord> addDimValueAlias(Long dimId, List<DictWord> newWords) {
|
||||
List<DictWord> dimValueAlias =
|
||||
dimValueAliasMap.containsKey(dimId) ? dimValueAliasMap.get(dimId)
|
||||
: new ArrayList<>();
|
||||
Set<String> wordSet =
|
||||
dimValueAlias
|
||||
.stream().map(word -> String.format("%s_%s_%s",
|
||||
word.getNatureWithFrequency(), word.getWord(), word.getAlias()))
|
||||
.collect(Collectors.toSet());
|
||||
for (DictWord dictWord : newWords) {
|
||||
String key = String.format("%s_%s_%s", dictWord.getNatureWithFrequency(),
|
||||
dictWord.getWord(), dictWord.getAlias());
|
||||
if (!wordSet.contains(key)) {
|
||||
dimValueAlias.add(dictWord);
|
||||
}
|
||||
if (dimId == null || CollectionUtils.isEmpty(newWords)) {
|
||||
return dimValueAliasMap.get(dimId);
|
||||
}
|
||||
|
||||
// Use computeIfAbsent and synchronized block for thread safety
|
||||
synchronized (dimValueAliasMap) {
|
||||
List<DictWord> dimValueAlias =
|
||||
dimValueAliasMap.computeIfAbsent(dimId, k -> new ArrayList<>());
|
||||
|
||||
// Build deduplication key set
|
||||
Set<String> existingKeys = dimValueAlias.stream().map(word -> buildDedupKey(word))
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
// Add new words with deduplication
|
||||
for (DictWord dictWord : newWords) {
|
||||
String key = buildDedupKey(dictWord);
|
||||
if (!existingKeys.contains(key)) {
|
||||
dimValueAlias.add(dictWord);
|
||||
existingKeys.add(key);
|
||||
}
|
||||
}
|
||||
|
||||
return dimValueAlias;
|
||||
}
|
||||
dimValueAliasMap.put(dimId, dimValueAlias);
|
||||
return dimValueAlias;
|
||||
}
|
||||
|
||||
public void updateSemanticKnowledge(List<DictWord> natures) {
|
||||
/**
|
||||
* Remove dimension value aliases by dimension ID.
|
||||
*
|
||||
* @param dimId dimension ID to remove, or null to clear all
|
||||
*/
|
||||
public static void removeDimValueAlias(Long dimId) {
|
||||
if (dimId == null) {
|
||||
dimValueAliasMap.clear();
|
||||
log.info("Cleared all dimension value aliases");
|
||||
} else {
|
||||
dimValueAliasMap.remove(dimId);
|
||||
log.info("Removed dimension value alias for dimId: {}", dimId);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Build deduplication key for DictWord.
|
||||
*
|
||||
* @param word the DictWord object
|
||||
* @return deduplication key string
|
||||
*/
|
||||
private static String buildDedupKey(DictWord word) {
|
||||
return String.format("%s_%s_%s", word.getNatureWithFrequency(), word.getWord(),
|
||||
word.getAlias());
|
||||
}
|
||||
|
||||
/**
|
||||
* Update semantic knowledge (incremental add, no clearing). Use this method to add new words
|
||||
* without removing existing data.
|
||||
*
|
||||
* @param natures the words to add
|
||||
*/
|
||||
public void updateSemanticKnowledge(List<DictWord> natures) {
|
||||
lock.writeLock().lock();
|
||||
try {
|
||||
updateSemanticKnowledgeInternal(natures);
|
||||
} finally {
|
||||
lock.writeLock().unlock();
|
||||
}
|
||||
}
|
||||
|
||||
private void updateSemanticKnowledgeInternal(List<DictWord> natures) {
|
||||
List<DictWord> prefixes = natures.stream().filter(
|
||||
entry -> !entry.getNatureWithFrequency().contains(DictWordType.SUFFIX.getType()))
|
||||
.collect(Collectors.toList());
|
||||
@@ -60,52 +124,82 @@ public class KnowledgeBaseService {
|
||||
SearchService.loadSuffix(suffixes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reload all knowledge (full replacement with clearing). Use this method to rebuild the entire
|
||||
* knowledge base.
|
||||
*
|
||||
* @param natures all words to load
|
||||
*/
|
||||
public void reloadAllData(List<DictWord> natures) {
|
||||
// 1. reload custom knowledge
|
||||
// 1. reload custom knowledge (executed outside lock to avoid long blocking)
|
||||
try {
|
||||
HanlpHelper.reloadCustomDictionary();
|
||||
} catch (Exception e) {
|
||||
log.error("reloadCustomDictionary error", e);
|
||||
}
|
||||
|
||||
// 2. update online knowledge
|
||||
if (CollectionUtils.isNotEmpty(dimValueAliasMap)) {
|
||||
for (Long dimId : dimValueAliasMap.keySet()) {
|
||||
natures.addAll(dimValueAliasMap.get(dimId));
|
||||
}
|
||||
}
|
||||
updateOnlineKnowledge(natures);
|
||||
}
|
||||
|
||||
private void updateOnlineKnowledge(List<DictWord> natures) {
|
||||
// 2. acquire write lock, clear trie and rebuild (short operation)
|
||||
lock.writeLock().lock();
|
||||
try {
|
||||
updateSemanticKnowledge(natures);
|
||||
} catch (Exception e) {
|
||||
log.error("updateSemanticKnowledge error", e);
|
||||
SearchService.clear();
|
||||
|
||||
if (CollectionUtils.isNotEmpty(dimValueAliasMap)) {
|
||||
for (Long dimId : dimValueAliasMap.keySet()) {
|
||||
natures.addAll(dimValueAliasMap.get(dimId));
|
||||
}
|
||||
}
|
||||
updateSemanticKnowledgeInternal(natures);
|
||||
} finally {
|
||||
lock.writeLock().unlock();
|
||||
}
|
||||
}
|
||||
|
||||
public List<S2Term> getTerms(String text, Map<Long, List<Long>> modelIdToDataSetIds) {
|
||||
return HanlpHelper.getTerms(text, modelIdToDataSetIds);
|
||||
lock.readLock().lock();
|
||||
try {
|
||||
return HanlpHelper.getTerms(text, modelIdToDataSetIds);
|
||||
} finally {
|
||||
lock.readLock().unlock();
|
||||
}
|
||||
}
|
||||
|
||||
public List<HanlpMapResult> prefixSearch(String key, int limit,
|
||||
Map<Long, List<Long>> modelIdToDataSetIds, Set<Long> detectDataSetIds) {
|
||||
return prefixSearchByModel(key, limit, modelIdToDataSetIds, detectDataSetIds);
|
||||
lock.readLock().lock();
|
||||
try {
|
||||
return prefixSearchByModel(key, limit, modelIdToDataSetIds, detectDataSetIds);
|
||||
} finally {
|
||||
lock.readLock().unlock();
|
||||
}
|
||||
}
|
||||
|
||||
public List<HanlpMapResult> prefixSearchByModel(String key, int limit,
|
||||
Map<Long, List<Long>> modelIdToDataSetIds, Set<Long> detectDataSetIds) {
|
||||
return SearchService.prefixSearch(key, limit, modelIdToDataSetIds, detectDataSetIds);
|
||||
lock.readLock().lock();
|
||||
try {
|
||||
return SearchService.prefixSearch(key, limit, modelIdToDataSetIds, detectDataSetIds);
|
||||
} finally {
|
||||
lock.readLock().unlock();
|
||||
}
|
||||
}
|
||||
|
||||
public List<HanlpMapResult> suffixSearch(String key, int limit,
|
||||
Map<Long, List<Long>> modelIdToDataSetIds, Set<Long> detectDataSetIds) {
|
||||
return suffixSearchByModel(key, limit, modelIdToDataSetIds, detectDataSetIds);
|
||||
lock.readLock().lock();
|
||||
try {
|
||||
return suffixSearchByModel(key, limit, modelIdToDataSetIds, detectDataSetIds);
|
||||
} finally {
|
||||
lock.readLock().unlock();
|
||||
}
|
||||
}
|
||||
|
||||
public List<HanlpMapResult> suffixSearchByModel(String key, int limit,
|
||||
Map<Long, List<Long>> modelIdToDataSetIds, Set<Long> detectDataSetIds) {
|
||||
return SearchService.suffixSearch(key, limit, modelIdToDataSetIds, detectDataSetIds);
|
||||
lock.readLock().lock();
|
||||
try {
|
||||
return SearchService.suffixSearch(key, limit, modelIdToDataSetIds, detectDataSetIds);
|
||||
} finally {
|
||||
lock.readLock().unlock();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -31,8 +31,8 @@ import java.util.stream.Collectors;
|
||||
public class SearchService {
|
||||
|
||||
public static final int SEARCH_SIZE = 200;
|
||||
private static BinTrie<List<String>> trie;
|
||||
private static BinTrie<List<String>> suffixTrie;
|
||||
private static volatile BinTrie<List<String>> trie;
|
||||
private static volatile BinTrie<List<String>> suffixTrie;
|
||||
|
||||
static {
|
||||
trie = new BinTrie<>();
|
||||
|
||||
@@ -100,8 +100,6 @@ public class HanlpHelper {
|
||||
FileHelper.deleteCacheFile(HanLP.Config.CustomDictionaryPath);
|
||||
FileHelper.resetCustomPath(getDynamicCustomDictionary());
|
||||
}
|
||||
// 3.clear trie
|
||||
SearchService.clear();
|
||||
|
||||
boolean reload = getDynamicCustomDictionary().reload();
|
||||
if (reload) {
|
||||
|
||||
Reference in New Issue
Block a user