diff --git a/dict/dict2.txt b/dict/dict2.txt new file mode 100644 index 0000000..1e2e57f --- /dev/null +++ b/dict/dict2.txt @@ -0,0 +1 @@ +刘*上*台 \ No newline at end of file diff --git a/examples/readme/main.go b/examples/readme/main.go index 667cda6..4ccf030 100644 --- a/examples/readme/main.go +++ b/examples/readme/main.go @@ -2,8 +2,7 @@ package main import ( "fmt" - - "github.com/importcjj/sensitive" + "sensitive" ) func main() { diff --git a/examples/simple.go b/examples/simple.go index bc47cd5..543005c 100644 --- a/examples/simple.go +++ b/examples/simple.go @@ -2,8 +2,7 @@ package main import ( "fmt" - - "github.com/importcjj/sensitive" + "sensitive" ) func main() { diff --git a/examples/test_issue_3/main.go b/examples/test_issue_3/main.go index f0e98e3..03c3b43 100644 --- a/examples/test_issue_3/main.go +++ b/examples/test_issue_3/main.go @@ -2,7 +2,7 @@ package main import ( "fmt" - "github.com/importcjj/sensitive" + "sensitive" ) func keywordFilterSearch(content string) (bool, string) { diff --git a/examples/test_issue_4/simple.go b/examples/test_issue_4/simple.go index 045094b..05a825a 100644 --- a/examples/test_issue_4/simple.go +++ b/examples/test_issue_4/simple.go @@ -2,12 +2,14 @@ package main import ( "fmt" - - "github.com/importcjj/sensitive" + "sensitive" ) func main() { filter := sensitive.New() - filter.LoadWordDict("../../dict/dict.txt") - fmt.Println(filter.Replace("xC4x", '*')) + filter.LoadWordDict("../../dict/dict2.txt") + fmt.Println(filter.ValidateWithWildcard("刘一上三台啊", '*')) + fmt.Println(filter.ValidateWithWildcard("哈哈哈刘一上三", '*')) + fmt.Println(filter.ValidateWithWildcard("哈哈哈刘一上三台", '*')) + fmt.Println(filter.ValidateWithWildcard("哈哈哈刘一上三台,你是个小白鼠", '*')) } diff --git a/filter.go b/filter.go index fed8b0e..a3d57e7 100644 --- a/filter.go +++ b/filter.go @@ -111,3 +111,8 @@ func (filter *Filter) Validate(text string) (bool, string) { func (filter *Filter) RemoveNoise(text string) string { return filter.noise.ReplaceAllString(text, "") } + +func (filter *Filter) ValidateWithWildcard(text string, wildcard rune) (bool, string) { + text = filter.RemoveNoise(text) + return filter.trie.ValidateWithWildcard(text, wildcard) +} diff --git a/trie_tree.go b/trie_tree.go index d17ea66..4166c69 100644 --- a/trie_tree.go +++ b/trie_tree.go @@ -173,6 +173,58 @@ func (tree *Trie) Validate(text string) (bool, string) { return true, Empty } +func (tree *Trie) ValidateWithWildcard(text string, wildcard rune) (bool, string) { + + runes := []rune(text) + + for curl := 0; curl < len(runes); curl++ { + + patter := "" + parent := tree.Root + if tree.dfs(runes, parent, curl, wildcard, "", &patter) { + return false, patter + } + } + return true, "" +} + +func (tree *Trie) dfs(runes []rune, parent *Node, curl int, wildcard rune, str string, patter *string) bool { + + if parent == nil { + return false + } + if parent.IsPathEnd() { + *patter = str + return true + } + if curl >= len(runes) { + return false + } + + // 匹配到了 + if current, found := parent.Children[runes[curl]]; found { + if is1 := tree.dfs(runes, current, curl+1, wildcard, str+string(runes[curl]), patter); is1 { + return true + } + } + + // 先看有没有* + if current1, found1 := parent.Children[wildcard]; found1 { + + if is2 := tree.dfs(runes, current1, curl+1, wildcard, str+string(wildcard), patter); is2 { + return true + } + + if current2, found2 := current1.Children[runes[curl]]; found2 { + if is3 := tree.dfs(runes, current2, curl+1, wildcard, str+string(wildcard)+string(runes[curl]), patter); is3 { + return true + } + } + } + return false + +} + // FindIn 判断text中是否含有词库中的词 func (tree *Trie) FindIn(text string) (bool, string) { validated, first := tree.Validate(text)