mirror of
https://github.com/tencentmusic/supersonic.git
synced 2025-12-11 03:58:14 +00:00
(improvement)(Headless) Filtering based on dataSetIds during Mapper detection Compatible with term (#1096)
Co-authored-by: jolunoluo
This commit is contained in:
@@ -10,7 +10,6 @@ import org.springframework.util.CollectionUtils;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
@Data
|
||||
@Slf4j
|
||||
@@ -19,23 +18,11 @@ public class LoadRemoveService {
|
||||
@Value("${mapper.remove.nature.prefix:}")
|
||||
private String mapperRemoveNaturePrefix;
|
||||
|
||||
public List removeNatures(List value, Set<Long> detectModelIds) {
|
||||
public List removeNatures(List value) {
|
||||
if (CollectionUtils.isEmpty(value)) {
|
||||
return value;
|
||||
}
|
||||
List<String> resultList = new ArrayList<>(value);
|
||||
if (!CollectionUtils.isEmpty(detectModelIds)) {
|
||||
resultList.removeIf(nature -> {
|
||||
if (Objects.isNull(nature)) {
|
||||
return false;
|
||||
}
|
||||
Long modelId = getDataSetId(nature);
|
||||
if (Objects.nonNull(modelId)) {
|
||||
return !detectModelIds.contains(modelId);
|
||||
}
|
||||
return false;
|
||||
});
|
||||
}
|
||||
if (StringUtils.isNotBlank(mapperRemoveNaturePrefix)) {
|
||||
resultList.removeIf(nature -> {
|
||||
if (Objects.isNull(nature)) {
|
||||
|
||||
@@ -2,6 +2,9 @@ package com.hankcs.hanlp.collection.trie.bintrie;
|
||||
|
||||
import com.hankcs.hanlp.LoadRemoveService;
|
||||
import com.hankcs.hanlp.corpus.io.ByteArray;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.ObjectInput;
|
||||
@@ -14,8 +17,6 @@ import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Queue;
|
||||
import java.util.Set;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
||||
public abstract class BaseNode<V> implements Comparable<BaseNode> {
|
||||
@@ -286,12 +287,12 @@ public abstract class BaseNode<V> implements Comparable<BaseNode> {
|
||||
+ '}';
|
||||
}
|
||||
|
||||
public void walkNode(Set<Map.Entry<String, V>> entrySet, Set<Long> detectModelIds) {
|
||||
public void walkNode(Set<Map.Entry<String, V>> entrySet) {
|
||||
if (status == Status.WORD_MIDDLE_2 || status == Status.WORD_END_3) {
|
||||
logger.debug("detectModelIds:{},before:{}", detectModelIds, value.toString());
|
||||
List natures = new LoadRemoveService().removeNatures((List) value, detectModelIds);
|
||||
logger.debug("walkNode before:{}", value.toString());
|
||||
List natures = new LoadRemoveService().removeNatures((List) value);
|
||||
String name = this.prefix != null ? this.prefix + c : "" + c;
|
||||
logger.debug("name:{},after:{},natures:{}", name, (List) value, natures);
|
||||
logger.debug("walkNode name:{},after:{},natures:{}", name, (List) value, natures);
|
||||
entrySet.add(new TrieEntry(name, (V) natures));
|
||||
}
|
||||
}
|
||||
@@ -300,21 +301,17 @@ public abstract class BaseNode<V> implements Comparable<BaseNode> {
|
||||
* walk limit
|
||||
* @param sb
|
||||
* @param entrySet
|
||||
* @param limit
|
||||
*/
|
||||
public void walkLimit(StringBuilder sb, Set<Map.Entry<String, V>> entrySet, int limit, Set<Long> detectModelIds) {
|
||||
public void walkLimit(StringBuilder sb, Set<Map.Entry<String, V>> entrySet) {
|
||||
Queue<BaseNode> queue = new ArrayDeque<>();
|
||||
this.prefix = sb.toString();
|
||||
queue.add(this);
|
||||
while (!queue.isEmpty()) {
|
||||
if (entrySet.size() >= limit) {
|
||||
break;
|
||||
}
|
||||
BaseNode root = queue.poll();
|
||||
if (root == null) {
|
||||
continue;
|
||||
}
|
||||
root.walkNode(entrySet, detectModelIds);
|
||||
root.walkNode(entrySet);
|
||||
if (root.child == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user