(improvement)(build) Add spotless during the build process. (#1639)

This commit is contained in:
lexluo09
2024-09-07 00:36:17 +08:00
committed by GitHub
parent ee15a88b06
commit 5f59e89eea
986 changed files with 15609 additions and 12706 deletions

View File

@@ -24,12 +24,13 @@ public class LoadRemoveService {
}
List<String> resultList = new ArrayList<>(value);
if (StringUtils.isNotBlank(mapperRemoveNaturePrefix)) {
resultList.removeIf(nature -> {
if (Objects.isNull(nature)) {
return false;
}
return nature.startsWith(mapperRemoveNaturePrefix);
});
resultList.removeIf(
nature -> {
if (Objects.isNull(nature)) {
return false;
}
return nature.startsWith(mapperRemoveNaturePrefix);
});
}
return resultList;
}
@@ -46,5 +47,4 @@ public class LoadRemoveService {
}
return null;
}
}

View File

@@ -20,26 +20,16 @@ import java.util.Set;
@Slf4j
public abstract class BaseNode<V> implements Comparable<BaseNode> {
/**
* 状态数组,方便读取的时候用
*/
/** 状态数组,方便读取的时候用 */
static final Status[] ARRAY_STATUS = Status.values();
/**
* 子节点
*/
/** 子节点 */
protected BaseNode[] child;
/**
* 节点状态
*/
/** 节点状态 */
protected Status status;
/**
* 节点代表的字符
*/
/** 节点代表的字符 */
protected char c;
/**
* 节点代表的值
*/
/** 节点代表的值 */
protected V value;
protected String prefix = null;
@@ -238,25 +228,18 @@ public abstract class BaseNode<V> implements Comparable<BaseNode> {
}
public enum Status {
/**
* 未指定,用于删除词条
*/
/** 未指定,用于删除词条 */
UNDEFINED_0,
/**
* 不是词语的结尾
*/
/** 不是词语的结尾 */
NOT_WORD_1,
/**
* 是个词语的结尾,并且还可以继续
*/
/** 是个词语的结尾,并且还可以继续 */
WORD_MIDDLE_2,
/**
* 是个词语的结尾,并且没有继续
*/
/** 是个词语的结尾,并且没有继续 */
WORD_END_3,
}
public class TrieEntry extends AbstractMap.SimpleEntry<String, V> implements Comparable<TrieEntry> {
public class TrieEntry extends AbstractMap.SimpleEntry<String, V>
implements Comparable<TrieEntry> {
public TrieEntry(String key, V value) {
super(key, value);
@@ -295,8 +278,9 @@ public abstract class BaseNode<V> implements Comparable<BaseNode> {
}
}
/***
* walk limit
/**
* * walk limit
*
* @param sb
* @param entrySet
*/
@@ -322,5 +306,4 @@ public abstract class BaseNode<V> implements Comparable<BaseNode> {
}
}
}
}

View File

@@ -1,6 +1,5 @@
package com.hankcs.hanlp.dictionary;
import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.collection.trie.DoubleArrayTrie;
import com.hankcs.hanlp.corpus.io.ByteArray;
@@ -8,6 +7,7 @@ import com.hankcs.hanlp.corpus.io.IOUtil;
import com.hankcs.hanlp.corpus.tag.Nature;
import com.hankcs.hanlp.utility.Predefine;
import com.hankcs.hanlp.utility.TextUtility;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.DataOutputStream;
@@ -21,9 +21,7 @@ import java.util.List;
import java.util.TreeMap;
import java.util.stream.Collectors;
/**
* 使用DoubleArrayTrie实现的核心词典
*/
/** 使用DoubleArrayTrie实现的核心词典 */
public class CoreDictionary {
public static DoubleArrayTrie<Attribute> trie = new DoubleArrayTrie<Attribute>();
@@ -36,8 +34,13 @@ public class CoreDictionary {
if (!load(PATH)) {
throw new IllegalArgumentException("核心词典" + PATH + "加载失败");
} else {
Predefine.logger.info(PATH + "加载成功," + trie.size() + "个词条,耗时"
+ (System.currentTimeMillis() - start) + "ms");
Predefine.logger.info(
PATH
+ "加载成功,"
+ trie.size()
+ "个词条,耗时"
+ (System.currentTimeMillis() - start)
+ "ms");
}
}
@@ -75,15 +78,21 @@ public class CoreDictionary {
totalFrequency += attribute.totalFrequency;
}
Predefine.logger.info(
"核心词典读入词条" + map.size() + " 全部频次" + totalFrequency + ",耗时" + (
System.currentTimeMillis() - start)
"核心词典读入词条"
+ map.size()
+ " 全部频次"
+ totalFrequency
+ ",耗时"
+ (System.currentTimeMillis() - start)
+ "ms");
br.close();
trie.build(map);
Predefine.logger.info("核心词典加载成功:" + trie.size() + "个词条,下面将写入缓存……");
try {
DataOutputStream out = new DataOutputStream(
new BufferedOutputStream(IOUtil.newOutputStream(path + Predefine.BIN_EXT)));
DataOutputStream out =
new DataOutputStream(
new BufferedOutputStream(
IOUtil.newOutputStream(path + Predefine.BIN_EXT)));
Collection<Attribute> attributeList = map.values();
out.writeInt(attributeList.size());
for (Attribute attribute : attributeList) {
@@ -202,25 +211,18 @@ public class CoreDictionary {
return trie.get(key) != null;
}
/**
* 核心词典中的词属性
*/
/** 核心词典中的词属性 */
public static class Attribute implements Serializable {
/**
* 词性列表
*/
/** 词性列表 */
public Nature[] nature;
/**
* 词性对应的词频
*/
/** 词性对应的词频 */
public int[] frequency;
public int totalFrequency;
public String[] originals;
public String original = null;
public Attribute(int size) {
nature = new Nature[size];
frequency = new int[size];
@@ -276,8 +278,11 @@ public class CoreDictionary {
}
return attribute;
} catch (Exception e) {
Predefine.logger.warning("使用字符串" + natureWithFrequency + "创建词条属性失败!"
+ TextUtility.exceptionToString(e));
Predefine.logger.warning(
"使用字符串"
+ natureWithFrequency
+ "创建词条属性失败!"
+ TextUtility.exceptionToString(e));
return null;
}
}
@@ -404,7 +409,10 @@ public class CoreDictionary {
if (originals == null || originals.length == 0) {
return null;
}
return Arrays.stream(originals).filter(o -> o != null).distinct().collect(Collectors.toList());
return Arrays.stream(originals)
.filter(o -> o != null)
.distinct()
.collect(Collectors.toList());
}
}
@@ -431,4 +439,3 @@ public class CoreDictionary {
return load(path);
}
}

View File

@@ -1,16 +1,15 @@
package com.hankcs.hanlp.seg;
import com.hankcs.hanlp.algorithm.Viterbi;
import com.hankcs.hanlp.collection.AhoCorasick.AhoCorasickDoubleArrayTrie;
import com.hankcs.hanlp.collection.trie.DoubleArrayTrie;
import com.hankcs.hanlp.corpus.tag.Nature;
import com.hankcs.hanlp.dictionary.CoreDictionary;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.dictionary.CoreDictionaryTransformMatrixDictionary;
import com.hankcs.hanlp.dictionary.other.CharType;
import com.hankcs.hanlp.seg.NShort.Path.AtomNode;
import com.hankcs.hanlp.seg.common.Graph;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.seg.common.Vertex;
import com.hankcs.hanlp.seg.common.WordNet;
import com.hankcs.hanlp.utility.TextUtility;
@@ -21,11 +20,9 @@ import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
public abstract class WordBasedSegment extends Segment {
public WordBasedSegment() {
}
public WordBasedSegment() {}
protected static void generateWord(List<Vertex> linkedArray, WordNet wordNetOptimum) {
fixResultByRule(linkedArray);
@@ -50,7 +47,9 @@ public abstract class WordBasedSegment extends Segment {
}
vertex = (Vertex) var1.next();
} while (!vertex.realWord.equals("") && !vertex.realWord.equals("") && !vertex.realWord.equals("-"));
} while (!vertex.realWord.equals("")
&& !vertex.realWord.equals("")
&& !vertex.realWord.equals("-"));
vertex.confirmNature(Nature.w);
}
@@ -64,9 +63,12 @@ public abstract class WordBasedSegment extends Segment {
for (Vertex current = next; listIterator.hasNext(); current = next) {
next = (Vertex) listIterator.next();
Nature currentNature = current.getNature();
if (currentNature == Nature.nx && (next.hasNature(Nature.q) || next.hasNature(Nature.n))) {
if (currentNature == Nature.nx
&& (next.hasNature(Nature.q) || next.hasNature(Nature.n))) {
String[] param = current.realWord.split("-", 1);
if (param.length == 2 && TextUtility.isAllNum(param[0]) && TextUtility.isAllNum(param[1])) {
if (param.length == 2
&& TextUtility.isAllNum(param[0])
&& TextUtility.isAllNum(param[1])) {
current = current.copy();
current.realWord = param[0];
current.confirmNature(Nature.m);
@@ -79,7 +81,6 @@ public abstract class WordBasedSegment extends Segment {
}
}
}
}
}
@@ -90,7 +91,8 @@ public abstract class WordBasedSegment extends Segment {
for (Vertex current = next; listIterator.hasNext(); current = next) {
next = (Vertex) listIterator.next();
if (TextUtility.isAllNum(current.realWord) || TextUtility.isAllChineseNum(current.realWord)) {
if (TextUtility.isAllNum(current.realWord)
|| TextUtility.isAllChineseNum(current.realWord)) {
String nextWord = next.realWord;
if (nextWord.length() == 1 && "月日时分秒".contains(nextWord)
|| nextWord.length() == 2 && nextWord.equals("月份")) {
@@ -110,8 +112,10 @@ public abstract class WordBasedSegment extends Segment {
current.confirmNature(Nature.m, true);
} else if (current.realWord.length() > 1) {
char last = current.realWord.charAt(current.realWord.length() - 1);
current = Vertex.newNumberInstance(
current.realWord.substring(0, current.realWord.length() - 1));
current =
Vertex.newNumberInstance(
current.realWord.substring(
0, current.realWord.length() - 1));
listIterator.previous();
listIterator.previous();
listIterator.set(current);
@@ -121,7 +125,6 @@ public abstract class WordBasedSegment extends Segment {
}
}
}
}
}
@@ -143,9 +146,7 @@ public abstract class WordBasedSegment extends Segment {
return wordNet.toGraph();
}
/**
* @deprecated
*/
/** @deprecated */
private static List<AtomNode> atomSegment(String sSentence, int start, int end) {
if (end < start) {
throw new RuntimeException("start=" + start + " < end=" + end);
@@ -161,7 +162,10 @@ public abstract class WordBasedSegment extends Segment {
charTypeArray[i] = CharType.get(c);
if (c == '.' && i < charArray.length - 1 && CharType.get(charArray[i + 1]) == 9) {
charTypeArray[i] = 9;
} else if (c == '.' && i < charArray.length - 1 && charArray[i + 1] >= '0' && charArray[i + 1] <= '9') {
} else if (c == '.'
&& i < charArray.length - 1
&& charArray[i + 1] >= '0'
&& charArray[i + 1] <= '9') {
charTypeArray[i] = 5;
} else if (charTypeArray[i] == 8) {
charTypeArray[i] = 5;
@@ -222,8 +226,10 @@ public abstract class WordBasedSegment extends Segment {
while (true) {
while (listIterator.hasNext()) {
next = (Vertex) listIterator.next();
if (!TextUtility.isAllNum(current.realWord) && !TextUtility.isAllChineseNum(current.realWord)
|| !TextUtility.isAllNum(next.realWord) && !TextUtility.isAllChineseNum(next.realWord)) {
if (!TextUtility.isAllNum(current.realWord)
&& !TextUtility.isAllChineseNum(current.realWord)
|| !TextUtility.isAllNum(next.realWord)
&& !TextUtility.isAllChineseNum(next.realWord)) {
current = next;
} else {
current = Vertex.newNumberInstance(current.realWord + next.realWord);
@@ -246,16 +252,24 @@ public abstract class WordBasedSegment extends Segment {
DoubleArrayTrie.Searcher searcher = CoreDictionary.trie.getSearcher(charArray, 0);
while (searcher.next()) {
wordNetStorage.add(searcher.begin + 1, new Vertex(new String(charArray, searcher.begin, searcher.length),
(CoreDictionary.Attribute) searcher.value, searcher.index));
wordNetStorage.add(
searcher.begin + 1,
new Vertex(
new String(charArray, searcher.begin, searcher.length),
(CoreDictionary.Attribute) searcher.value,
searcher.index));
}
if (this.config.forceCustomDictionary) {
this.customDictionary.parseText(charArray, new AhoCorasickDoubleArrayTrie.IHit<CoreDictionary.Attribute>() {
public void hit(int begin, int end, CoreDictionary.Attribute value) {
wordNetStorage.add(begin + 1, new Vertex(new String(charArray, begin, end - begin), value));
}
});
this.customDictionary.parseText(
charArray,
new AhoCorasickDoubleArrayTrie.IHit<CoreDictionary.Attribute>() {
public void hit(int begin, int end, CoreDictionary.Attribute value) {
wordNetStorage.add(
begin + 1,
new Vertex(new String(charArray, begin, end - begin), value));
}
});
}
LinkedList<Vertex>[] vertexes = wordNetStorage.getVertexes();
@@ -266,9 +280,10 @@ public abstract class WordBasedSegment extends Segment {
if (vertexes[i].isEmpty()) {
int j;
for (j = i + 1;
j < vertexes.length - 1 && (vertexes[j].isEmpty() || CharType.get(charArray[j - 1]) == 11);
++j) {
}
j < vertexes.length - 1
&& (vertexes[j].isEmpty()
|| CharType.get(charArray[j - 1]) == 11);
++j) {}
wordNetStorage.add(i, Segment.quickAtomSegment(charArray, i - 1, j - 1));
i = j;
@@ -291,12 +306,14 @@ public abstract class WordBasedSegment extends Segment {
for (int i = 0; i < length; ++i) {
Vertex vertex = (Vertex) listIterator.next();
Term termMain = Segment.convert(vertex);
//termList.add(termMain);
// termList.add(termMain);
addTerms(termList, vertex, line - 1);
termMain.offset = line - 1;
if (vertex.realWord.length() > 2) {
label43:
for (int currentLine = line; currentLine < line + vertex.realWord.length(); ++currentLine) {
for (int currentLine = line;
currentLine < line + vertex.realWord.length();
++currentLine) {
Iterator iterator = wordNetAll.descendingIterator(currentLine);
while (true) {
@@ -310,11 +327,12 @@ public abstract class WordBasedSegment extends Segment {
&& smallVertex.realWord.length() < this.config.indexMode);
if (smallVertex != vertex
&& currentLine + smallVertex.realWord.length() <= line + vertex.realWord.length()) {
&& currentLine + smallVertex.realWord.length()
<= line + vertex.realWord.length()) {
listIterator.add(smallVertex);
//Term termSub = convert(smallVertex);
//termSub.offset = currentLine - 1;
//termList.add(termSub);
// Term termSub = convert(smallVertex);
// termSub.offset = currentLine - 1;
// termList.add(termSub);
addTerms(termList, smallVertex, currentLine - 1);
}
}
@@ -328,7 +346,8 @@ public abstract class WordBasedSegment extends Segment {
}
protected static void speechTagging(List<Vertex> vertexList) {
Viterbi.compute(vertexList, CoreDictionaryTransformMatrixDictionary.transformMatrixDictionary);
Viterbi.compute(
vertexList, CoreDictionaryTransformMatrixDictionary.transformMatrixDictionary);
}
protected void addTerms(List<Term> terms, Vertex vertex, int offset) {

View File

@@ -1,14 +1,9 @@
package com.hankcs.hanlp.seg.common;
import com.hankcs.hanlp.corpus.tag.Nature;
//import com.hankcs.hanlp.dictionary.CoreDictionary;
//import com.hankcs.hanlp.dictionary.CustomDictionary;
//import com.hankcs.hanlp.dictionary.DynamicCustomDictionary;
import lombok.Data;
import lombok.ToString;
//import static com.hankcs.hanlp.HanLP.Config.CustomDictionaryPath;
@Data
@ToString
public class Term {
@@ -72,5 +67,4 @@ public class Term {
}
return super.equals(obj);
}
}