diff --git a/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/knowledge/KnowledgeBaseService.java b/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/knowledge/KnowledgeBaseService.java index 7cf018360..4090c705b 100644 --- a/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/knowledge/KnowledgeBaseService.java +++ b/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/knowledge/KnowledgeBaseService.java @@ -8,43 +8,107 @@ import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Service; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.stream.Collectors; @Service @Slf4j public class KnowledgeBaseService { - private static volatile Map> dimValueAliasMap = new HashMap<>(); + private static final Map> dimValueAliasMap = new ConcurrentHashMap<>(); + private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); + /** + * Get dimension value alias map (read-only). + * + * @return unmodifiable view of the map + */ public static Map> getDimValueAlias() { - return dimValueAliasMap; + return Collections.unmodifiableMap(dimValueAliasMap); } + /** + * Add dimension value aliases with deduplication. Thread-safe implementation using + * ConcurrentHashMap. + * + * @param dimId dimension ID + * @param newWords new words to add + * @return updated list of aliases for the dimension + */ public static List addDimValueAlias(Long dimId, List newWords) { - List dimValueAlias = - dimValueAliasMap.containsKey(dimId) ? dimValueAliasMap.get(dimId) - : new ArrayList<>(); - Set wordSet = - dimValueAlias - .stream().map(word -> String.format("%s_%s_%s", - word.getNatureWithFrequency(), word.getWord(), word.getAlias())) - .collect(Collectors.toSet()); - for (DictWord dictWord : newWords) { - String key = String.format("%s_%s_%s", dictWord.getNatureWithFrequency(), - dictWord.getWord(), dictWord.getAlias()); - if (!wordSet.contains(key)) { - dimValueAlias.add(dictWord); - } + if (dimId == null || CollectionUtils.isEmpty(newWords)) { + return dimValueAliasMap.get(dimId); + } + + // Use computeIfAbsent and synchronized block for thread safety + synchronized (dimValueAliasMap) { + List dimValueAlias = + dimValueAliasMap.computeIfAbsent(dimId, k -> new ArrayList<>()); + + // Build deduplication key set + Set existingKeys = dimValueAlias.stream().map(word -> buildDedupKey(word)) + .collect(Collectors.toSet()); + + // Add new words with deduplication + for (DictWord dictWord : newWords) { + String key = buildDedupKey(dictWord); + if (!existingKeys.contains(key)) { + dimValueAlias.add(dictWord); + existingKeys.add(key); + } + } + + return dimValueAlias; } - dimValueAliasMap.put(dimId, dimValueAlias); - return dimValueAlias; } - public void updateSemanticKnowledge(List natures) { + /** + * Remove dimension value aliases by dimension ID. + * + * @param dimId dimension ID to remove, or null to clear all + */ + public static void removeDimValueAlias(Long dimId) { + if (dimId == null) { + dimValueAliasMap.clear(); + log.info("Cleared all dimension value aliases"); + } else { + dimValueAliasMap.remove(dimId); + log.info("Removed dimension value alias for dimId: {}", dimId); + } + } + /** + * Build deduplication key for DictWord. + * + * @param word the DictWord object + * @return deduplication key string + */ + private static String buildDedupKey(DictWord word) { + return String.format("%s_%s_%s", word.getNatureWithFrequency(), word.getWord(), + word.getAlias()); + } + + /** + * Update semantic knowledge (incremental add, no clearing). Use this method to add new words + * without removing existing data. + * + * @param natures the words to add + */ + public void updateSemanticKnowledge(List natures) { + lock.writeLock().lock(); + try { + updateSemanticKnowledgeInternal(natures); + } finally { + lock.writeLock().unlock(); + } + } + + private void updateSemanticKnowledgeInternal(List natures) { List prefixes = natures.stream().filter( entry -> !entry.getNatureWithFrequency().contains(DictWordType.SUFFIX.getType())) .collect(Collectors.toList()); @@ -60,52 +124,82 @@ public class KnowledgeBaseService { SearchService.loadSuffix(suffixes); } + /** + * Reload all knowledge (full replacement with clearing). Use this method to rebuild the entire + * knowledge base. + * + * @param natures all words to load + */ public void reloadAllData(List natures) { - // 1. reload custom knowledge + // 1. reload custom knowledge (executed outside lock to avoid long blocking) try { HanlpHelper.reloadCustomDictionary(); } catch (Exception e) { log.error("reloadCustomDictionary error", e); } - // 2. update online knowledge - if (CollectionUtils.isNotEmpty(dimValueAliasMap)) { - for (Long dimId : dimValueAliasMap.keySet()) { - natures.addAll(dimValueAliasMap.get(dimId)); - } - } - updateOnlineKnowledge(natures); - } - - private void updateOnlineKnowledge(List natures) { + // 2. acquire write lock, clear trie and rebuild (short operation) + lock.writeLock().lock(); try { - updateSemanticKnowledge(natures); - } catch (Exception e) { - log.error("updateSemanticKnowledge error", e); + SearchService.clear(); + + if (CollectionUtils.isNotEmpty(dimValueAliasMap)) { + for (Long dimId : dimValueAliasMap.keySet()) { + natures.addAll(dimValueAliasMap.get(dimId)); + } + } + updateSemanticKnowledgeInternal(natures); + } finally { + lock.writeLock().unlock(); } } public List getTerms(String text, Map> modelIdToDataSetIds) { - return HanlpHelper.getTerms(text, modelIdToDataSetIds); + lock.readLock().lock(); + try { + return HanlpHelper.getTerms(text, modelIdToDataSetIds); + } finally { + lock.readLock().unlock(); + } } public List prefixSearch(String key, int limit, Map> modelIdToDataSetIds, Set detectDataSetIds) { - return prefixSearchByModel(key, limit, modelIdToDataSetIds, detectDataSetIds); + lock.readLock().lock(); + try { + return prefixSearchByModel(key, limit, modelIdToDataSetIds, detectDataSetIds); + } finally { + lock.readLock().unlock(); + } } public List prefixSearchByModel(String key, int limit, Map> modelIdToDataSetIds, Set detectDataSetIds) { - return SearchService.prefixSearch(key, limit, modelIdToDataSetIds, detectDataSetIds); + lock.readLock().lock(); + try { + return SearchService.prefixSearch(key, limit, modelIdToDataSetIds, detectDataSetIds); + } finally { + lock.readLock().unlock(); + } } public List suffixSearch(String key, int limit, Map> modelIdToDataSetIds, Set detectDataSetIds) { - return suffixSearchByModel(key, limit, modelIdToDataSetIds, detectDataSetIds); + lock.readLock().lock(); + try { + return suffixSearchByModel(key, limit, modelIdToDataSetIds, detectDataSetIds); + } finally { + lock.readLock().unlock(); + } } public List suffixSearchByModel(String key, int limit, Map> modelIdToDataSetIds, Set detectDataSetIds) { - return SearchService.suffixSearch(key, limit, modelIdToDataSetIds, detectDataSetIds); + lock.readLock().lock(); + try { + return SearchService.suffixSearch(key, limit, modelIdToDataSetIds, detectDataSetIds); + } finally { + lock.readLock().unlock(); + } } } diff --git a/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/knowledge/SearchService.java b/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/knowledge/SearchService.java index 8c14ffa53..247444e0c 100644 --- a/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/knowledge/SearchService.java +++ b/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/knowledge/SearchService.java @@ -31,8 +31,8 @@ import java.util.stream.Collectors; public class SearchService { public static final int SEARCH_SIZE = 200; - private static BinTrie> trie; - private static BinTrie> suffixTrie; + private static volatile BinTrie> trie; + private static volatile BinTrie> suffixTrie; static { trie = new BinTrie<>(); diff --git a/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/knowledge/helper/HanlpHelper.java b/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/knowledge/helper/HanlpHelper.java index 4e6730a6b..05502f4ea 100644 --- a/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/knowledge/helper/HanlpHelper.java +++ b/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/knowledge/helper/HanlpHelper.java @@ -100,8 +100,6 @@ public class HanlpHelper { FileHelper.deleteCacheFile(HanLP.Config.CustomDictionaryPath); FileHelper.resetCustomPath(getDynamicCustomDictionary()); } - // 3.clear trie - SearchService.clear(); boolean reload = getDynamicCustomDictionary().reload(); if (reload) {