From 75e15f4c5095fe2ddebe40bf088ea1840cc16187 Mon Sep 17 00:00:00 2001
From: lexluo <lexluo@tencent.com>
Date: Sat, 8 Jul 2023 15:50:39 +0800
Subject: [PATCH] [improvement](chat) unformatted hanlp code make seach/query
 work

---
 .../collection/trie/bintrie/BaseNode.java     | 65 +++++++++---------
 .../hanlp/dictionary/CoreDictionary.java      | 66 ++++++++++---------
 .../hankcs/hanlp/seg/WordBasedSegment.java    |  8 +--
 3 files changed, 71 insertions(+), 68 deletions(-)
diff --git a/chat/knowledge/src/main/java/com/hankcs/hanlp/collection/trie/bintrie/BaseNode.java b/chat/knowledge/src/main/java/com/hankcs/hanlp/collection/trie/bintrie/BaseNode.java
index 895634c1e..ec978fe09 100644
--- a/chat/knowledge/src/main/java/com/hankcs/hanlp/collection/trie/bintrie/BaseNode.java
+++ b/chat/knowledge/src/main/java/com/hankcs/hanlp/collection/trie/bintrie/BaseNode.java
@@ -19,7 +19,6 @@ public abstract class BaseNode<V> implements Comparable<BaseNode> {
      * 状态数组，方便读取的时候用
      */
     static final Status[] ARRAY_STATUS = Status.values();
-    public String prefix = null;
     /**
      * 子节点
      */
@@ -37,6 +36,8 @@ public abstract class BaseNode<V> implements Comparable<BaseNode> {
      */
     protected V value;
 
+    public String prefix = null;
+
     public BaseNode<V> transition(String path, int begin) {
         BaseNode<V> cur = this;
         for (int i = begin; i < path.length(); ++i) {
@@ -230,6 +231,37 @@ public abstract class BaseNode<V> implements Comparable<BaseNode> {
         }
     }
 
+    public enum Status {
+        /**
+         * 未指定，用于删除词条
+         */
+        UNDEFINED_0,
+        /**
+         * 不是词语的结尾
+         */
+        NOT_WORD_1,
+        /**
+         * 是个词语的结尾，并且还可以继续
+         */
+        WORD_MIDDLE_2,
+        /**
+         * 是个词语的结尾，并且没有继续
+         */
+        WORD_END_3,
+    }
+
+    public class TrieEntry extends AbstractMap.SimpleEntry<String, V> implements Comparable<TrieEntry> {
+
+        public TrieEntry(String key, V value) {
+            super(key, value);
+        }
+
+        @Override
+        public int compareTo(TrieEntry o) {
+            return getKey().compareTo(String.valueOf(o.getKey()));
+        }
+    }
+
     @Override
     public String toString() {
         return "BaseNode{"
@@ -284,35 +316,4 @@ public abstract class BaseNode<V> implements Comparable<BaseNode> {
         }
     }
 
-    public enum Status {
-        /**
-         * 未指定，用于删除词条
-         */
-        UNDEFINED_0,
-        /**
-         * 不是词语的结尾
-         */
-        NOT_WORD_1,
-        /**
-         * 是个词语的结尾，并且还可以继续
-         */
-        WORD_MIDDLE_2,
-        /**
-         * 是个词语的结尾，并且没有继续
-         */
-        WORD_END_3,
-    }
-
-    public class TrieEntry extends AbstractMap.SimpleEntry<String, V> implements Comparable<TrieEntry> {
-
-        public TrieEntry(String key, V value) {
-            super(key, value);
-        }
-
-        @Override
-        public int compareTo(TrieEntry o) {
-            return getKey().compareTo(String.valueOf(o.getKey()));
-        }
-    }
-
 }
diff --git a/chat/knowledge/src/main/java/com/hankcs/hanlp/dictionary/CoreDictionary.java b/chat/knowledge/src/main/java/com/hankcs/hanlp/dictionary/CoreDictionary.java
index fecc6cb1e..e97c9f903 100644
--- a/chat/knowledge/src/main/java/com/hankcs/hanlp/dictionary/CoreDictionary.java
+++ b/chat/knowledge/src/main/java/com/hankcs/hanlp/dictionary/CoreDictionary.java
@@ -26,16 +26,9 @@ import java.util.TreeMap;
  */
 public class CoreDictionary {
 
-    public static final String PATH = HanLP.Config.CoreDictionaryPath;
     public static DoubleArrayTrie<Attribute> trie = new DoubleArrayTrie<Attribute>();
-    // 一些特殊的WORD_ID
-    public static final int NR_WORD_ID = getWordID(Predefine.TAG_PEOPLE);
-    public static final int NS_WORD_ID = getWordID(Predefine.TAG_PLACE);
-    public static final int NT_WORD_ID = getWordID(Predefine.TAG_GROUP);
-    public static final int T_WORD_ID = getWordID(Predefine.TAG_TIME);
-    public static final int X_WORD_ID = getWordID(Predefine.TAG_CLUSTER);
-    public static final int M_WORD_ID = getWordID(Predefine.TAG_NUMBER);
-    public static final int NX_WORD_ID = getWordID(Predefine.TAG_PROPER);
+
+    public static final String PATH = HanLP.Config.CoreDictionaryPath;
 
     // 自动加载词典
     static {
@@ -47,6 +40,15 @@ public class CoreDictionary {
         }
     }
 
+    // 一些特殊的WORD_ID
+    public static final int NR_WORD_ID = getWordID(Predefine.TAG_PEOPLE);
+    public static final int NS_WORD_ID = getWordID(Predefine.TAG_PLACE);
+    public static final int NT_WORD_ID = getWordID(Predefine.TAG_GROUP);
+    public static final int T_WORD_ID = getWordID(Predefine.TAG_TIME);
+    public static final int X_WORD_ID = getWordID(Predefine.TAG_CLUSTER);
+    public static final int M_WORD_ID = getWordID(Predefine.TAG_NUMBER);
+    public static final int NX_WORD_ID = getWordID(Predefine.TAG_PROPER);
+
     private static boolean load(String path) {
         logger.info("核心词典开始加载:" + path);
         if (loadDat(path)) {
@@ -198,29 +200,6 @@ public class CoreDictionary {
         return trie.get(key) != null;
     }
 
-    /**
-     * 获取词语的ID
-     *
-     * @param a 词语
-     * @return ID, 如果不存在, 则返回-1
-     */
-    public static int getWordID(String a) {
-        return CoreDictionary.trie.exactMatchSearch(a);
-    }
-
-    /**
-     * 热更新核心词典<br>
-     * 集群环境（或其他IOAdapter）需要自行删除缓存文件
-     *
-     * @return 是否成功
-     */
-    public static boolean reload() {
-        String path = CoreDictionary.PATH;
-        IOUtil.deleteFile(path + Predefine.BIN_EXT);
-
-        return load(path);
-    }
-
     /**
      * 核心词典中的词属性
      */
@@ -387,5 +366,28 @@ public class CoreDictionary {
             }
         }
     }
+
+    /**
+     * 获取词语的ID
+     *
+     * @param a 词语
+     * @return ID, 如果不存在, 则返回-1
+     */
+    public static int getWordID(String a) {
+        return CoreDictionary.trie.exactMatchSearch(a);
+    }
+
+    /**
+     * 热更新核心词典<br>
+     * 集群环境（或其他IOAdapter）需要自行删除缓存文件
+     *
+     * @return 是否成功
+     */
+    public static boolean reload() {
+        String path = CoreDictionary.PATH;
+        IOUtil.deleteFile(path + Predefine.BIN_EXT);
+
+        return load(path);
+    }
 }
 
diff --git a/chat/knowledge/src/main/java/com/hankcs/hanlp/seg/WordBasedSegment.java b/chat/knowledge/src/main/java/com/hankcs/hanlp/seg/WordBasedSegment.java
index b467abba3..47204ec23 100644
--- a/chat/knowledge/src/main/java/com/hankcs/hanlp/seg/WordBasedSegment.java
+++ b/chat/knowledge/src/main/java/com/hankcs/hanlp/seg/WordBasedSegment.java
@@ -236,10 +236,6 @@ public abstract class WordBasedSegment extends Segment {
         }
     }
 
-    protected static void speechTagging(List<Vertex> vertexList) {
-        Viterbi.compute(vertexList, CoreDictionaryTransformMatrixDictionary.transformMatrixDictionary);
-    }
-
     protected void generateWordNet(final WordNet wordNetStorage) {
         final char[] charArray = wordNetStorage.charArray;
         DoubleArrayTrie.Searcher searcher = CoreDictionary.trie.getSearcher(charArray, 0);
@@ -326,6 +322,10 @@ public abstract class WordBasedSegment extends Segment {
         return termList;
     }
 
+    protected static void speechTagging(List<Vertex> vertexList) {
+        Viterbi.compute(vertexList, CoreDictionaryTransformMatrixDictionary.transformMatrixDictionary);
+    }
+
     protected void addTerms(List<Term> terms, Vertex vertex, int offset) {
         for (int i = 0; i < vertex.attribute.nature.length; i++) {
             Term term = new Term(vertex.realWord, vertex.attribute.nature[i]);