Skip to content

Commit

Permalink
화장실 검색어 매칭 단순화 (#410)
Browse files Browse the repository at this point in the history
toilet simple search
  • Loading branch information
sanggggg authored Oct 28, 2024
1 parent 6a77411 commit bf28ace
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 112 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class ExternalAccessibilitySearchService(
}
.filter {
searchText ?: return@filter true
searchText.isSimilarWith(it.name)
it.name.isSimilarWith(pattern = searchText)
}
}
}
Original file line number Diff line number Diff line change
@@ -1,120 +1,25 @@
package club.staircrusher.stdlib.util.string

fun String.emptyToNull() = this.ifBlank { null }
import java.util.*

// https://en.wikipedia.org/wiki/Levenshtein_distance#:~:text=The%20Levenshtein%20distance%20between%20two,defined%20the%20metric%20in%201965.
// string similarity by levenshtein distance considering korean
fun String.isSimilarWith(other: String, maxThreshold: Int = 3): Boolean {
val similarity = jamoLevenshtein(this, other)
return similarity <= maxThreshold
}
fun String.emptyToNull() = this.ifBlank { null }

private fun Char.decomposeHangul(): List<Char>? {
val result = mutableListOf<Char>()
val choseongs =
listOf('', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '')
val joongseongs =
listOf('', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '')
val jongseongs = listOf(
null,
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
'',
''
fun String.isSimilarWith(pattern: String): Boolean {
return simpleMatch(
this.lowercase(Locale.US),
pattern.lowercase(Locale.US).filter { it.isWhitespace().not() }
)
val char = this
val codePoint = char.code

if (codePoint in 44032..55203) {
val baseCode = codePoint - 44032
val choseongIndex = baseCode / 21 / 28
val joongseongIndex = baseCode / 28 % 21
val jongseongIndex = baseCode % 28

result.addAll(
listOfNotNull(choseongs[choseongIndex], joongseongs[joongseongIndex], jongseongs[jongseongIndex]),
)
} else {
return null
}
return result
}


private fun levenshtein(s1: String, s2: String, cost: Map<Pair<Char, Char>, Int> = emptyMap()): Int {
if (s1.length < s2.length) {
return levenshtein(s2, s1, cost)
}

if (s2.isEmpty()) {
return s1.length
}

val previousRow = IntArray(s2.length + 1) { it }
for (i in s1.indices) {
val currentRow = IntArray(s2.length + 1)
currentRow[0] = i + 1
for (j in s2.indices) {
val insertion = previousRow[j + 1] + 1
val deletion = currentRow[j] + 1
val substitution = previousRow[j] + if (s1[i] == s2[j]) 0 else cost.getOrDefault(s1[i] to s2[j], 1)
currentRow[j + 1] = minOf(insertion, deletion, substitution)
private fun simpleMatch(text: String, pattern: String): Boolean {
var patternIndex = 0
for (char in text) {
if (patternIndex < pattern.length && pattern[patternIndex] == char) {
patternIndex++
}
previousRow.indices.forEach { previousRow[it] = currentRow[it] } // Optimized copy
}
return previousRow.last()
}

private fun jamoLevenshtein(s1: String, s2: String): Int {
if (s1.length < s2.length) {
return jamoLevenshtein(s2, s1)
}

if (s2.isEmpty()) {
return s1.length
}

val previousRow = IntArray(s2.length + 1) { it }
for (i in s1.indices) {
val currentRow = IntArray(s2.length + 1)
currentRow[0] = i + 1
for (j in s2.indices) {
val insertion = previousRow[j + 1] + 1
val deletion = currentRow[j] + 1
val substitution = previousRow[j] + getJamoCost(s1[i], s2[j])
currentRow[j + 1] = minOf(insertion, deletion, substitution)
if (patternIndex == pattern.length) {
return true
}
previousRow.indices.forEach { previousRow[it] = currentRow[it] } // Optimized copy
}
return previousRow.last()
}

private fun getJamoCost(c1: Char, c2: Char): Int {
if (c1 == c2) return 0
val jamo1 = c1.decomposeHangul()
val jamo2 = c2.decomposeHangul()
return if (jamo1 != null && jamo2 != null) levenshtein(jamo1.joinToString(""), jamo2.joinToString("")) / 3 else 1
return false
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@ class StringUtilTest {
assert("농협하나로마트".isSimilarWith("농하나로마트"))
assert("농협하나로마트".isSimilarWith("하나로마트"))
assert("농협하나로마트".isSimilarWith("농협마트"))
assert("농협하나로마트".isSimilarWith("농협"))
assert("NonghyupMart".isSimilarWith("NonghyupMart"))
assert("농협하나로마트".isSimilarWith("농협허너루미틋"))
assert("NonghyupMart".isSimilarWith("nonghyup"))

assert(!"농협하나로마트".isSimilarWith("아무런"))
assert(!"농협하나로마트".isSimilarWith("asdf"))
assert(!"농협하나로마트".isSimilarWith("농협"))
}
}

0 comments on commit bf28ace

Please sign in to comment.