mirror of
https://github.com/tencentmusic/supersonic.git
synced 2026-04-25 17:24:27 +08:00
(fix)(headless)Fix concurrent read/write search trie issue.
This commit is contained in:
@@ -8,43 +8,107 @@ import lombok.extern.slf4j.Slf4j;
|
|||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
@Service
|
@Service
|
||||||
@Slf4j
|
@Slf4j
|
||||||
public class KnowledgeBaseService {
|
public class KnowledgeBaseService {
|
||||||
private static volatile Map<Long, List<DictWord>> dimValueAliasMap = new HashMap<>();
|
private static final Map<Long, List<DictWord>> dimValueAliasMap = new ConcurrentHashMap<>();
|
||||||
|
private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get dimension value alias map (read-only).
|
||||||
|
*
|
||||||
|
* @return unmodifiable view of the map
|
||||||
|
*/
|
||||||
public static Map<Long, List<DictWord>> getDimValueAlias() {
|
public static Map<Long, List<DictWord>> getDimValueAlias() {
|
||||||
return dimValueAliasMap;
|
return Collections.unmodifiableMap(dimValueAliasMap);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add dimension value aliases with deduplication. Thread-safe implementation using
|
||||||
|
* ConcurrentHashMap.
|
||||||
|
*
|
||||||
|
* @param dimId dimension ID
|
||||||
|
* @param newWords new words to add
|
||||||
|
* @return updated list of aliases for the dimension
|
||||||
|
*/
|
||||||
public static List<DictWord> addDimValueAlias(Long dimId, List<DictWord> newWords) {
|
public static List<DictWord> addDimValueAlias(Long dimId, List<DictWord> newWords) {
|
||||||
List<DictWord> dimValueAlias =
|
if (dimId == null || CollectionUtils.isEmpty(newWords)) {
|
||||||
dimValueAliasMap.containsKey(dimId) ? dimValueAliasMap.get(dimId)
|
return dimValueAliasMap.get(dimId);
|
||||||
: new ArrayList<>();
|
}
|
||||||
Set<String> wordSet =
|
|
||||||
dimValueAlias
|
// Use computeIfAbsent and synchronized block for thread safety
|
||||||
.stream().map(word -> String.format("%s_%s_%s",
|
synchronized (dimValueAliasMap) {
|
||||||
word.getNatureWithFrequency(), word.getWord(), word.getAlias()))
|
List<DictWord> dimValueAlias =
|
||||||
.collect(Collectors.toSet());
|
dimValueAliasMap.computeIfAbsent(dimId, k -> new ArrayList<>());
|
||||||
for (DictWord dictWord : newWords) {
|
|
||||||
String key = String.format("%s_%s_%s", dictWord.getNatureWithFrequency(),
|
// Build deduplication key set
|
||||||
dictWord.getWord(), dictWord.getAlias());
|
Set<String> existingKeys = dimValueAlias.stream().map(word -> buildDedupKey(word))
|
||||||
if (!wordSet.contains(key)) {
|
.collect(Collectors.toSet());
|
||||||
dimValueAlias.add(dictWord);
|
|
||||||
}
|
// Add new words with deduplication
|
||||||
|
for (DictWord dictWord : newWords) {
|
||||||
|
String key = buildDedupKey(dictWord);
|
||||||
|
if (!existingKeys.contains(key)) {
|
||||||
|
dimValueAlias.add(dictWord);
|
||||||
|
existingKeys.add(key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return dimValueAlias;
|
||||||
}
|
}
|
||||||
dimValueAliasMap.put(dimId, dimValueAlias);
|
|
||||||
return dimValueAlias;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void updateSemanticKnowledge(List<DictWord> natures) {
|
/**
|
||||||
|
* Remove dimension value aliases by dimension ID.
|
||||||
|
*
|
||||||
|
* @param dimId dimension ID to remove, or null to clear all
|
||||||
|
*/
|
||||||
|
public static void removeDimValueAlias(Long dimId) {
|
||||||
|
if (dimId == null) {
|
||||||
|
dimValueAliasMap.clear();
|
||||||
|
log.info("Cleared all dimension value aliases");
|
||||||
|
} else {
|
||||||
|
dimValueAliasMap.remove(dimId);
|
||||||
|
log.info("Removed dimension value alias for dimId: {}", dimId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build deduplication key for DictWord.
|
||||||
|
*
|
||||||
|
* @param word the DictWord object
|
||||||
|
* @return deduplication key string
|
||||||
|
*/
|
||||||
|
private static String buildDedupKey(DictWord word) {
|
||||||
|
return String.format("%s_%s_%s", word.getNatureWithFrequency(), word.getWord(),
|
||||||
|
word.getAlias());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update semantic knowledge (incremental add, no clearing). Use this method to add new words
|
||||||
|
* without removing existing data.
|
||||||
|
*
|
||||||
|
* @param natures the words to add
|
||||||
|
*/
|
||||||
|
public void updateSemanticKnowledge(List<DictWord> natures) {
|
||||||
|
lock.writeLock().lock();
|
||||||
|
try {
|
||||||
|
updateSemanticKnowledgeInternal(natures);
|
||||||
|
} finally {
|
||||||
|
lock.writeLock().unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void updateSemanticKnowledgeInternal(List<DictWord> natures) {
|
||||||
List<DictWord> prefixes = natures.stream().filter(
|
List<DictWord> prefixes = natures.stream().filter(
|
||||||
entry -> !entry.getNatureWithFrequency().contains(DictWordType.SUFFIX.getType()))
|
entry -> !entry.getNatureWithFrequency().contains(DictWordType.SUFFIX.getType()))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
@@ -60,52 +124,82 @@ public class KnowledgeBaseService {
|
|||||||
SearchService.loadSuffix(suffixes);
|
SearchService.loadSuffix(suffixes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reload all knowledge (full replacement with clearing). Use this method to rebuild the entire
|
||||||
|
* knowledge base.
|
||||||
|
*
|
||||||
|
* @param natures all words to load
|
||||||
|
*/
|
||||||
public void reloadAllData(List<DictWord> natures) {
|
public void reloadAllData(List<DictWord> natures) {
|
||||||
// 1. reload custom knowledge
|
// 1. reload custom knowledge (executed outside lock to avoid long blocking)
|
||||||
try {
|
try {
|
||||||
HanlpHelper.reloadCustomDictionary();
|
HanlpHelper.reloadCustomDictionary();
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
log.error("reloadCustomDictionary error", e);
|
log.error("reloadCustomDictionary error", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2. update online knowledge
|
// 2. acquire write lock, clear trie and rebuild (short operation)
|
||||||
if (CollectionUtils.isNotEmpty(dimValueAliasMap)) {
|
lock.writeLock().lock();
|
||||||
for (Long dimId : dimValueAliasMap.keySet()) {
|
|
||||||
natures.addAll(dimValueAliasMap.get(dimId));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
updateOnlineKnowledge(natures);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void updateOnlineKnowledge(List<DictWord> natures) {
|
|
||||||
try {
|
try {
|
||||||
updateSemanticKnowledge(natures);
|
SearchService.clear();
|
||||||
} catch (Exception e) {
|
|
||||||
log.error("updateSemanticKnowledge error", e);
|
if (CollectionUtils.isNotEmpty(dimValueAliasMap)) {
|
||||||
|
for (Long dimId : dimValueAliasMap.keySet()) {
|
||||||
|
natures.addAll(dimValueAliasMap.get(dimId));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
updateSemanticKnowledgeInternal(natures);
|
||||||
|
} finally {
|
||||||
|
lock.writeLock().unlock();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<S2Term> getTerms(String text, Map<Long, List<Long>> modelIdToDataSetIds) {
|
public List<S2Term> getTerms(String text, Map<Long, List<Long>> modelIdToDataSetIds) {
|
||||||
return HanlpHelper.getTerms(text, modelIdToDataSetIds);
|
lock.readLock().lock();
|
||||||
|
try {
|
||||||
|
return HanlpHelper.getTerms(text, modelIdToDataSetIds);
|
||||||
|
} finally {
|
||||||
|
lock.readLock().unlock();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<HanlpMapResult> prefixSearch(String key, int limit,
|
public List<HanlpMapResult> prefixSearch(String key, int limit,
|
||||||
Map<Long, List<Long>> modelIdToDataSetIds, Set<Long> detectDataSetIds) {
|
Map<Long, List<Long>> modelIdToDataSetIds, Set<Long> detectDataSetIds) {
|
||||||
return prefixSearchByModel(key, limit, modelIdToDataSetIds, detectDataSetIds);
|
lock.readLock().lock();
|
||||||
|
try {
|
||||||
|
return prefixSearchByModel(key, limit, modelIdToDataSetIds, detectDataSetIds);
|
||||||
|
} finally {
|
||||||
|
lock.readLock().unlock();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<HanlpMapResult> prefixSearchByModel(String key, int limit,
|
public List<HanlpMapResult> prefixSearchByModel(String key, int limit,
|
||||||
Map<Long, List<Long>> modelIdToDataSetIds, Set<Long> detectDataSetIds) {
|
Map<Long, List<Long>> modelIdToDataSetIds, Set<Long> detectDataSetIds) {
|
||||||
return SearchService.prefixSearch(key, limit, modelIdToDataSetIds, detectDataSetIds);
|
lock.readLock().lock();
|
||||||
|
try {
|
||||||
|
return SearchService.prefixSearch(key, limit, modelIdToDataSetIds, detectDataSetIds);
|
||||||
|
} finally {
|
||||||
|
lock.readLock().unlock();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<HanlpMapResult> suffixSearch(String key, int limit,
|
public List<HanlpMapResult> suffixSearch(String key, int limit,
|
||||||
Map<Long, List<Long>> modelIdToDataSetIds, Set<Long> detectDataSetIds) {
|
Map<Long, List<Long>> modelIdToDataSetIds, Set<Long> detectDataSetIds) {
|
||||||
return suffixSearchByModel(key, limit, modelIdToDataSetIds, detectDataSetIds);
|
lock.readLock().lock();
|
||||||
|
try {
|
||||||
|
return suffixSearchByModel(key, limit, modelIdToDataSetIds, detectDataSetIds);
|
||||||
|
} finally {
|
||||||
|
lock.readLock().unlock();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<HanlpMapResult> suffixSearchByModel(String key, int limit,
|
public List<HanlpMapResult> suffixSearchByModel(String key, int limit,
|
||||||
Map<Long, List<Long>> modelIdToDataSetIds, Set<Long> detectDataSetIds) {
|
Map<Long, List<Long>> modelIdToDataSetIds, Set<Long> detectDataSetIds) {
|
||||||
return SearchService.suffixSearch(key, limit, modelIdToDataSetIds, detectDataSetIds);
|
lock.readLock().lock();
|
||||||
|
try {
|
||||||
|
return SearchService.suffixSearch(key, limit, modelIdToDataSetIds, detectDataSetIds);
|
||||||
|
} finally {
|
||||||
|
lock.readLock().unlock();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -31,8 +31,8 @@ import java.util.stream.Collectors;
|
|||||||
public class SearchService {
|
public class SearchService {
|
||||||
|
|
||||||
public static final int SEARCH_SIZE = 200;
|
public static final int SEARCH_SIZE = 200;
|
||||||
private static BinTrie<List<String>> trie;
|
private static volatile BinTrie<List<String>> trie;
|
||||||
private static BinTrie<List<String>> suffixTrie;
|
private static volatile BinTrie<List<String>> suffixTrie;
|
||||||
|
|
||||||
static {
|
static {
|
||||||
trie = new BinTrie<>();
|
trie = new BinTrie<>();
|
||||||
|
|||||||
@@ -100,8 +100,6 @@ public class HanlpHelper {
|
|||||||
FileHelper.deleteCacheFile(HanLP.Config.CustomDictionaryPath);
|
FileHelper.deleteCacheFile(HanLP.Config.CustomDictionaryPath);
|
||||||
FileHelper.resetCustomPath(getDynamicCustomDictionary());
|
FileHelper.resetCustomPath(getDynamicCustomDictionary());
|
||||||
}
|
}
|
||||||
// 3.clear trie
|
|
||||||
SearchService.clear();
|
|
||||||
|
|
||||||
boolean reload = getDynamicCustomDictionary().reload();
|
boolean reload = getDynamicCustomDictionary().reload();
|
||||||
if (reload) {
|
if (reload) {
|
||||||
|
|||||||
Reference in New Issue
Block a user