mirror of
https://github.com/tencentmusic/supersonic.git
synced 2025-12-10 11:07:06 +00:00
[improvement](chat) unformatted hanlp code make seach/query work
This commit is contained in:
@@ -19,7 +19,6 @@ public abstract class BaseNode<V> implements Comparable<BaseNode> {
|
||||
* 状态数组,方便读取的时候用
|
||||
*/
|
||||
static final Status[] ARRAY_STATUS = Status.values();
|
||||
public String prefix = null;
|
||||
/**
|
||||
* 子节点
|
||||
*/
|
||||
@@ -37,6 +36,8 @@ public abstract class BaseNode<V> implements Comparable<BaseNode> {
|
||||
*/
|
||||
protected V value;
|
||||
|
||||
public String prefix = null;
|
||||
|
||||
public BaseNode<V> transition(String path, int begin) {
|
||||
BaseNode<V> cur = this;
|
||||
for (int i = begin; i < path.length(); ++i) {
|
||||
@@ -230,6 +231,37 @@ public abstract class BaseNode<V> implements Comparable<BaseNode> {
|
||||
}
|
||||
}
|
||||
|
||||
public enum Status {
|
||||
/**
|
||||
* 未指定,用于删除词条
|
||||
*/
|
||||
UNDEFINED_0,
|
||||
/**
|
||||
* 不是词语的结尾
|
||||
*/
|
||||
NOT_WORD_1,
|
||||
/**
|
||||
* 是个词语的结尾,并且还可以继续
|
||||
*/
|
||||
WORD_MIDDLE_2,
|
||||
/**
|
||||
* 是个词语的结尾,并且没有继续
|
||||
*/
|
||||
WORD_END_3,
|
||||
}
|
||||
|
||||
public class TrieEntry extends AbstractMap.SimpleEntry<String, V> implements Comparable<TrieEntry> {
|
||||
|
||||
public TrieEntry(String key, V value) {
|
||||
super(key, value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(TrieEntry o) {
|
||||
return getKey().compareTo(String.valueOf(o.getKey()));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "BaseNode{"
|
||||
@@ -284,35 +316,4 @@ public abstract class BaseNode<V> implements Comparable<BaseNode> {
|
||||
}
|
||||
}
|
||||
|
||||
public enum Status {
|
||||
/**
|
||||
* 未指定,用于删除词条
|
||||
*/
|
||||
UNDEFINED_0,
|
||||
/**
|
||||
* 不是词语的结尾
|
||||
*/
|
||||
NOT_WORD_1,
|
||||
/**
|
||||
* 是个词语的结尾,并且还可以继续
|
||||
*/
|
||||
WORD_MIDDLE_2,
|
||||
/**
|
||||
* 是个词语的结尾,并且没有继续
|
||||
*/
|
||||
WORD_END_3,
|
||||
}
|
||||
|
||||
public class TrieEntry extends AbstractMap.SimpleEntry<String, V> implements Comparable<TrieEntry> {
|
||||
|
||||
public TrieEntry(String key, V value) {
|
||||
super(key, value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(TrieEntry o) {
|
||||
return getKey().compareTo(String.valueOf(o.getKey()));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -26,16 +26,9 @@ import java.util.TreeMap;
|
||||
*/
|
||||
public class CoreDictionary {
|
||||
|
||||
public static final String PATH = HanLP.Config.CoreDictionaryPath;
|
||||
public static DoubleArrayTrie<Attribute> trie = new DoubleArrayTrie<Attribute>();
|
||||
// 一些特殊的WORD_ID
|
||||
public static final int NR_WORD_ID = getWordID(Predefine.TAG_PEOPLE);
|
||||
public static final int NS_WORD_ID = getWordID(Predefine.TAG_PLACE);
|
||||
public static final int NT_WORD_ID = getWordID(Predefine.TAG_GROUP);
|
||||
public static final int T_WORD_ID = getWordID(Predefine.TAG_TIME);
|
||||
public static final int X_WORD_ID = getWordID(Predefine.TAG_CLUSTER);
|
||||
public static final int M_WORD_ID = getWordID(Predefine.TAG_NUMBER);
|
||||
public static final int NX_WORD_ID = getWordID(Predefine.TAG_PROPER);
|
||||
|
||||
public static final String PATH = HanLP.Config.CoreDictionaryPath;
|
||||
|
||||
// 自动加载词典
|
||||
static {
|
||||
@@ -47,6 +40,15 @@ public class CoreDictionary {
|
||||
}
|
||||
}
|
||||
|
||||
// 一些特殊的WORD_ID
|
||||
public static final int NR_WORD_ID = getWordID(Predefine.TAG_PEOPLE);
|
||||
public static final int NS_WORD_ID = getWordID(Predefine.TAG_PLACE);
|
||||
public static final int NT_WORD_ID = getWordID(Predefine.TAG_GROUP);
|
||||
public static final int T_WORD_ID = getWordID(Predefine.TAG_TIME);
|
||||
public static final int X_WORD_ID = getWordID(Predefine.TAG_CLUSTER);
|
||||
public static final int M_WORD_ID = getWordID(Predefine.TAG_NUMBER);
|
||||
public static final int NX_WORD_ID = getWordID(Predefine.TAG_PROPER);
|
||||
|
||||
private static boolean load(String path) {
|
||||
logger.info("核心词典开始加载:" + path);
|
||||
if (loadDat(path)) {
|
||||
@@ -198,29 +200,6 @@ public class CoreDictionary {
|
||||
return trie.get(key) != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取词语的ID
|
||||
*
|
||||
* @param a 词语
|
||||
* @return ID, 如果不存在, 则返回-1
|
||||
*/
|
||||
public static int getWordID(String a) {
|
||||
return CoreDictionary.trie.exactMatchSearch(a);
|
||||
}
|
||||
|
||||
/**
|
||||
* 热更新核心词典<br>
|
||||
* 集群环境(或其他IOAdapter)需要自行删除缓存文件
|
||||
*
|
||||
* @return 是否成功
|
||||
*/
|
||||
public static boolean reload() {
|
||||
String path = CoreDictionary.PATH;
|
||||
IOUtil.deleteFile(path + Predefine.BIN_EXT);
|
||||
|
||||
return load(path);
|
||||
}
|
||||
|
||||
/**
|
||||
* 核心词典中的词属性
|
||||
*/
|
||||
@@ -387,5 +366,28 @@ public class CoreDictionary {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取词语的ID
|
||||
*
|
||||
* @param a 词语
|
||||
* @return ID, 如果不存在, 则返回-1
|
||||
*/
|
||||
public static int getWordID(String a) {
|
||||
return CoreDictionary.trie.exactMatchSearch(a);
|
||||
}
|
||||
|
||||
/**
|
||||
* 热更新核心词典<br>
|
||||
* 集群环境(或其他IOAdapter)需要自行删除缓存文件
|
||||
*
|
||||
* @return 是否成功
|
||||
*/
|
||||
public static boolean reload() {
|
||||
String path = CoreDictionary.PATH;
|
||||
IOUtil.deleteFile(path + Predefine.BIN_EXT);
|
||||
|
||||
return load(path);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -236,10 +236,6 @@ public abstract class WordBasedSegment extends Segment {
|
||||
}
|
||||
}
|
||||
|
||||
protected static void speechTagging(List<Vertex> vertexList) {
|
||||
Viterbi.compute(vertexList, CoreDictionaryTransformMatrixDictionary.transformMatrixDictionary);
|
||||
}
|
||||
|
||||
protected void generateWordNet(final WordNet wordNetStorage) {
|
||||
final char[] charArray = wordNetStorage.charArray;
|
||||
DoubleArrayTrie.Searcher searcher = CoreDictionary.trie.getSearcher(charArray, 0);
|
||||
@@ -326,6 +322,10 @@ public abstract class WordBasedSegment extends Segment {
|
||||
return termList;
|
||||
}
|
||||
|
||||
protected static void speechTagging(List<Vertex> vertexList) {
|
||||
Viterbi.compute(vertexList, CoreDictionaryTransformMatrixDictionary.transformMatrixDictionary);
|
||||
}
|
||||
|
||||
protected void addTerms(List<Term> terms, Vertex vertex, int offset) {
|
||||
for (int i = 0; i < vertex.attribute.nature.length; i++) {
|
||||
Term term = new Term(vertex.realWord, vertex.attribute.nature[i]);
|
||||
|
||||
Reference in New Issue
Block a user