mirror of
https://github.com/tencentmusic/supersonic.git
synced 2025-12-12 12:37:55 +00:00
[improvement](chat) unformatted hanlp code make seach/query work
This commit is contained in:
@@ -19,7 +19,6 @@ public abstract class BaseNode<V> implements Comparable<BaseNode> {
|
|||||||
* 状态数组,方便读取的时候用
|
* 状态数组,方便读取的时候用
|
||||||
*/
|
*/
|
||||||
static final Status[] ARRAY_STATUS = Status.values();
|
static final Status[] ARRAY_STATUS = Status.values();
|
||||||
public String prefix = null;
|
|
||||||
/**
|
/**
|
||||||
* 子节点
|
* 子节点
|
||||||
*/
|
*/
|
||||||
@@ -37,6 +36,8 @@ public abstract class BaseNode<V> implements Comparable<BaseNode> {
|
|||||||
*/
|
*/
|
||||||
protected V value;
|
protected V value;
|
||||||
|
|
||||||
|
public String prefix = null;
|
||||||
|
|
||||||
public BaseNode<V> transition(String path, int begin) {
|
public BaseNode<V> transition(String path, int begin) {
|
||||||
BaseNode<V> cur = this;
|
BaseNode<V> cur = this;
|
||||||
for (int i = begin; i < path.length(); ++i) {
|
for (int i = begin; i < path.length(); ++i) {
|
||||||
@@ -230,6 +231,37 @@ public abstract class BaseNode<V> implements Comparable<BaseNode> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public enum Status {
|
||||||
|
/**
|
||||||
|
* 未指定,用于删除词条
|
||||||
|
*/
|
||||||
|
UNDEFINED_0,
|
||||||
|
/**
|
||||||
|
* 不是词语的结尾
|
||||||
|
*/
|
||||||
|
NOT_WORD_1,
|
||||||
|
/**
|
||||||
|
* 是个词语的结尾,并且还可以继续
|
||||||
|
*/
|
||||||
|
WORD_MIDDLE_2,
|
||||||
|
/**
|
||||||
|
* 是个词语的结尾,并且没有继续
|
||||||
|
*/
|
||||||
|
WORD_END_3,
|
||||||
|
}
|
||||||
|
|
||||||
|
public class TrieEntry extends AbstractMap.SimpleEntry<String, V> implements Comparable<TrieEntry> {
|
||||||
|
|
||||||
|
public TrieEntry(String key, V value) {
|
||||||
|
super(key, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compareTo(TrieEntry o) {
|
||||||
|
return getKey().compareTo(String.valueOf(o.getKey()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "BaseNode{"
|
return "BaseNode{"
|
||||||
@@ -284,35 +316,4 @@ public abstract class BaseNode<V> implements Comparable<BaseNode> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public enum Status {
|
|
||||||
/**
|
|
||||||
* 未指定,用于删除词条
|
|
||||||
*/
|
|
||||||
UNDEFINED_0,
|
|
||||||
/**
|
|
||||||
* 不是词语的结尾
|
|
||||||
*/
|
|
||||||
NOT_WORD_1,
|
|
||||||
/**
|
|
||||||
* 是个词语的结尾,并且还可以继续
|
|
||||||
*/
|
|
||||||
WORD_MIDDLE_2,
|
|
||||||
/**
|
|
||||||
* 是个词语的结尾,并且没有继续
|
|
||||||
*/
|
|
||||||
WORD_END_3,
|
|
||||||
}
|
|
||||||
|
|
||||||
public class TrieEntry extends AbstractMap.SimpleEntry<String, V> implements Comparable<TrieEntry> {
|
|
||||||
|
|
||||||
public TrieEntry(String key, V value) {
|
|
||||||
super(key, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int compareTo(TrieEntry o) {
|
|
||||||
return getKey().compareTo(String.valueOf(o.getKey()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -26,16 +26,9 @@ import java.util.TreeMap;
|
|||||||
*/
|
*/
|
||||||
public class CoreDictionary {
|
public class CoreDictionary {
|
||||||
|
|
||||||
public static final String PATH = HanLP.Config.CoreDictionaryPath;
|
|
||||||
public static DoubleArrayTrie<Attribute> trie = new DoubleArrayTrie<Attribute>();
|
public static DoubleArrayTrie<Attribute> trie = new DoubleArrayTrie<Attribute>();
|
||||||
// 一些特殊的WORD_ID
|
|
||||||
public static final int NR_WORD_ID = getWordID(Predefine.TAG_PEOPLE);
|
public static final String PATH = HanLP.Config.CoreDictionaryPath;
|
||||||
public static final int NS_WORD_ID = getWordID(Predefine.TAG_PLACE);
|
|
||||||
public static final int NT_WORD_ID = getWordID(Predefine.TAG_GROUP);
|
|
||||||
public static final int T_WORD_ID = getWordID(Predefine.TAG_TIME);
|
|
||||||
public static final int X_WORD_ID = getWordID(Predefine.TAG_CLUSTER);
|
|
||||||
public static final int M_WORD_ID = getWordID(Predefine.TAG_NUMBER);
|
|
||||||
public static final int NX_WORD_ID = getWordID(Predefine.TAG_PROPER);
|
|
||||||
|
|
||||||
// 自动加载词典
|
// 自动加载词典
|
||||||
static {
|
static {
|
||||||
@@ -47,6 +40,15 @@ public class CoreDictionary {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 一些特殊的WORD_ID
|
||||||
|
public static final int NR_WORD_ID = getWordID(Predefine.TAG_PEOPLE);
|
||||||
|
public static final int NS_WORD_ID = getWordID(Predefine.TAG_PLACE);
|
||||||
|
public static final int NT_WORD_ID = getWordID(Predefine.TAG_GROUP);
|
||||||
|
public static final int T_WORD_ID = getWordID(Predefine.TAG_TIME);
|
||||||
|
public static final int X_WORD_ID = getWordID(Predefine.TAG_CLUSTER);
|
||||||
|
public static final int M_WORD_ID = getWordID(Predefine.TAG_NUMBER);
|
||||||
|
public static final int NX_WORD_ID = getWordID(Predefine.TAG_PROPER);
|
||||||
|
|
||||||
private static boolean load(String path) {
|
private static boolean load(String path) {
|
||||||
logger.info("核心词典开始加载:" + path);
|
logger.info("核心词典开始加载:" + path);
|
||||||
if (loadDat(path)) {
|
if (loadDat(path)) {
|
||||||
@@ -198,29 +200,6 @@ public class CoreDictionary {
|
|||||||
return trie.get(key) != null;
|
return trie.get(key) != null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* 获取词语的ID
|
|
||||||
*
|
|
||||||
* @param a 词语
|
|
||||||
* @return ID, 如果不存在, 则返回-1
|
|
||||||
*/
|
|
||||||
public static int getWordID(String a) {
|
|
||||||
return CoreDictionary.trie.exactMatchSearch(a);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* 热更新核心词典<br>
|
|
||||||
* 集群环境(或其他IOAdapter)需要自行删除缓存文件
|
|
||||||
*
|
|
||||||
* @return 是否成功
|
|
||||||
*/
|
|
||||||
public static boolean reload() {
|
|
||||||
String path = CoreDictionary.PATH;
|
|
||||||
IOUtil.deleteFile(path + Predefine.BIN_EXT);
|
|
||||||
|
|
||||||
return load(path);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 核心词典中的词属性
|
* 核心词典中的词属性
|
||||||
*/
|
*/
|
||||||
@@ -387,5 +366,28 @@ public class CoreDictionary {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 获取词语的ID
|
||||||
|
*
|
||||||
|
* @param a 词语
|
||||||
|
* @return ID, 如果不存在, 则返回-1
|
||||||
|
*/
|
||||||
|
public static int getWordID(String a) {
|
||||||
|
return CoreDictionary.trie.exactMatchSearch(a);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 热更新核心词典<br>
|
||||||
|
* 集群环境(或其他IOAdapter)需要自行删除缓存文件
|
||||||
|
*
|
||||||
|
* @return 是否成功
|
||||||
|
*/
|
||||||
|
public static boolean reload() {
|
||||||
|
String path = CoreDictionary.PATH;
|
||||||
|
IOUtil.deleteFile(path + Predefine.BIN_EXT);
|
||||||
|
|
||||||
|
return load(path);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -236,10 +236,6 @@ public abstract class WordBasedSegment extends Segment {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static void speechTagging(List<Vertex> vertexList) {
|
|
||||||
Viterbi.compute(vertexList, CoreDictionaryTransformMatrixDictionary.transformMatrixDictionary);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void generateWordNet(final WordNet wordNetStorage) {
|
protected void generateWordNet(final WordNet wordNetStorage) {
|
||||||
final char[] charArray = wordNetStorage.charArray;
|
final char[] charArray = wordNetStorage.charArray;
|
||||||
DoubleArrayTrie.Searcher searcher = CoreDictionary.trie.getSearcher(charArray, 0);
|
DoubleArrayTrie.Searcher searcher = CoreDictionary.trie.getSearcher(charArray, 0);
|
||||||
@@ -326,6 +322,10 @@ public abstract class WordBasedSegment extends Segment {
|
|||||||
return termList;
|
return termList;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected static void speechTagging(List<Vertex> vertexList) {
|
||||||
|
Viterbi.compute(vertexList, CoreDictionaryTransformMatrixDictionary.transformMatrixDictionary);
|
||||||
|
}
|
||||||
|
|
||||||
protected void addTerms(List<Term> terms, Vertex vertex, int offset) {
|
protected void addTerms(List<Term> terms, Vertex vertex, int offset) {
|
||||||
for (int i = 0; i < vertex.attribute.nature.length; i++) {
|
for (int i = 0; i < vertex.attribute.nature.length; i++) {
|
||||||
Term term = new Term(vertex.realWord, vertex.attribute.nature[i]);
|
Term term = new Term(vertex.realWord, vertex.attribute.nature[i]);
|
||||||
|
|||||||
Reference in New Issue
Block a user