Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

바이너리로 데이터를 강제하기(데이터 정확성이 올라감(?)) 위해서 COLLATE utf8mb4_bin 를 사용한건가요?!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

맞습니다! 한글 자/모를 분리할 때 유니코드를 정확히 비교하기 위해서 사용했습니다!

Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package com.dreamypatisiel.devdevdev.domain.entity;

import jakarta.persistence.*;
import lombok.AccessLevel;
import lombok.Builder;
import lombok.Getter;
import lombok.NoArgsConstructor;

@Entity
@Getter
@NoArgsConstructor(access = AccessLevel.PROTECTED)
@Table(indexes = {
@Index(name = "idx_tech_keyword_01", columnList = "chosung_key"),
@Index(name = "idx_tech_keyword_02", columnList = "jamo_key")
})
public class TechKeyword extends BasicTime {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
private Long id;

@Column(nullable = false, length = 100, columnDefinition = "varchar(100) COLLATE utf8mb4_bin")
private String keyword;

@Column(nullable = false, length = 300, columnDefinition = "varchar(300) COLLATE utf8mb4_bin")
private String jamoKey;

@Column(nullable = false, length = 150, columnDefinition = "varchar(150) COLLATE utf8mb4_bin")
private String chosungKey;

@Builder
private TechKeyword(String keyword, String jamoKey, String chosungKey) {
this.keyword = keyword;
this.jamoKey = jamoKey;
this.chosungKey = chosungKey;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package com.dreamypatisiel.devdevdev.domain.repository.techArticle;

import com.dreamypatisiel.devdevdev.domain.entity.TechKeyword;
import com.dreamypatisiel.devdevdev.domain.repository.techArticle.custom.TechKeywordRepositoryCustom;
import org.springframework.data.jpa.repository.JpaRepository;

public interface TechKeywordRepository extends JpaRepository<TechKeyword, Long>, TechKeywordRepositoryCustom {
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package com.dreamypatisiel.devdevdev.domain.repository.techArticle.custom;

import com.dreamypatisiel.devdevdev.domain.entity.TechKeyword;
import org.springframework.data.domain.Pageable;

import java.util.List;

public interface TechKeywordRepositoryCustom {
List<TechKeyword> searchKeyword(String inputJamo, String inputChosung, Pageable pageable);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package com.dreamypatisiel.devdevdev.domain.repository.techArticle.custom;

import com.dreamypatisiel.devdevdev.domain.entity.TechKeyword;
import com.querydsl.core.types.dsl.BooleanExpression;
import com.querydsl.core.types.dsl.Expressions;
import com.querydsl.core.types.dsl.NumberTemplate;
import com.querydsl.jpa.JPQLQueryFactory;
import lombok.RequiredArgsConstructor;
import org.springframework.data.domain.Pageable;

import java.util.List;

import static com.dreamypatisiel.devdevdev.domain.entity.QTechKeyword.techKeyword;

@RequiredArgsConstructor
public class TechKeywordRepositoryImpl implements TechKeywordRepositoryCustom {

public static final String MATCH_AGAINST_FUNCTION = "match_against";
private final JPQLQueryFactory query;

@Override
public List<TechKeyword> searchKeyword(String inputJamo, String inputChosung, Pageable pageable) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

대략 이런 쿼리로 이해했는데요. 맞을까요?!

select tk.*
from tech_keyword tk
where match(tk.jamo_key) against('+ㅈㅏㅂㅏ' in boolean mode) > 0.0
   or match(tk.chosung_key) against('+ㅈㅂ' in boolean mode) > 0.0
order by greatest(
                 match(tk.jamo_key) against('+ㅈㅏㅂㅏ' in boolean mode),
                 match(tk.chosung_key) against('+ㅈㅂ' in boolean mode)
         ) desc,
         char_length(tk.keyword) asc
limit 20;

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

정확합니다! 실제로 호출하면 아래와 같은 쿼리가 작성됩니다.

select
        tk1_0.id,
        tk1_0.chosung_key,
        tk1_0.created_at,
        tk1_0.jamo_key,
        tk1_0.keyword,
        tk1_0.last_modified_at 
    from
        tech_keyword tk1_0 
    where
        match (tk1_0.jamo_key) against (? in boolean mode)>0.0 
        or match (tk1_0.chosung_key) against (? in boolean mode)>0.0 
    order by
        greatest(match (tk1_0.jamo_key) against (? in boolean mode),
                 match (tk1_0.chosung_key) against (? in boolean mode)) desc,
        character_length(tk1_0.keyword) 
    limit
        ?

BooleanExpression jamoMatch = Expressions.booleanTemplate(
"function('" + MATCH_AGAINST_FUNCTION + "', {0}, {1}) > 0.0",
techKeyword.jamoKey, inputJamo
);

BooleanExpression chosungMatch = Expressions.booleanTemplate(
"function('" + MATCH_AGAINST_FUNCTION + "', {0}, {1}) > 0.0",
techKeyword.chosungKey, inputChosung
);

// 스코어 계산을 위한 expression
NumberTemplate<Double> jamoScore = Expressions.numberTemplate(Double.class,
"function('" + MATCH_AGAINST_FUNCTION + "', {0}, {1})",
techKeyword.jamoKey, inputJamo
);
NumberTemplate<Double> chosungScore = Expressions.numberTemplate(Double.class,
"function('" + MATCH_AGAINST_FUNCTION + "', {0}, {1})",
techKeyword.chosungKey, inputChosung
);

return query
.selectFrom(techKeyword)
.where(jamoMatch.or(chosungMatch))
.orderBy(
// 더 높은 스코어를 우선으로 정렬
Expressions.numberTemplate(Double.class,
"GREATEST({0}, {1})", jamoScore, chosungScore).desc(),
// 동일한 스코어라면 키워드 길이가 짧은 것을 우선으로 정렬
techKeyword.keyword.length().asc()
)
.limit(pageable.getPageSize())
.fetch();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package com.dreamypatisiel.devdevdev.domain.service.techArticle.keyword;

import com.dreamypatisiel.devdevdev.domain.entity.TechKeyword;
import com.dreamypatisiel.devdevdev.domain.repository.techArticle.TechKeywordRepository;
import com.dreamypatisiel.devdevdev.global.utils.HangulUtils;
import lombok.RequiredArgsConstructor;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.domain.Pageable;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;

import java.util.List;

@Service
@Transactional(readOnly = true)
@RequiredArgsConstructor
public class TechKeywordService {
private final TechKeywordRepository techKeywordRepository;

/**
* @Note:
* @Author: 유소영
* @Since: 2025.08.13
* @param prefix
* @return 검색어(최대 20개)
*/
public List<String> autocompleteKeyword(String prefix) {
String processedInput = prefix;

// 한글이 포함되어 있다면 자/모음 분리
if (HangulUtils.hasHangul(prefix)) {
processedInput = HangulUtils.convertToJamo(prefix);
}

// 불리언 검색을 위해 토큰 사이에 '+' 연산자 추가
String booleanPrefix = convertToBooleanSearch(processedInput);
Pageable pageable = PageRequest.of(0, 20);
List<TechKeyword> techKeywords = techKeywordRepository.searchKeyword(booleanPrefix, booleanPrefix, pageable);

// 응답 데이터 가공
return techKeywords.stream()
.map(TechKeyword::getKeyword)
.toList();
}

/**
* 불리언 검색을 위해 각 토큰 사이에 '+' 연산자를 추가하는 메서드
*/
private String convertToBooleanSearch(String searchTerm) {
if (searchTerm == null || searchTerm.trim().isEmpty()) {
return searchTerm;
}

// 공백을 기준으로 토큰을 분리하고 각 토큰 앞에 '+' 추가
String[] tokens = searchTerm.trim().split("\\s+");
for (int i = 0; i < tokens.length; i++) {
tokens[i] = "+" + tokens[i];
}
return String.join(" ", tokens);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package com.dreamypatisiel.devdevdev.global.config;

import org.hibernate.boot.model.FunctionContributions;
import org.hibernate.boot.model.FunctionContributor;

import static org.hibernate.type.StandardBasicTypes.DOUBLE;

public class CustomMySQLFunctionContributor implements FunctionContributor {
private static final String MATCH_AGAINST_FUNCTION = "match_against";
private static final String MATCH_AGAINST_PATTERN = "match (?1) against (?2 in boolean mode)";

@Override
public void contributeFunctions(FunctionContributions functionContributions) {
functionContributions.getFunctionRegistry()
.registerPattern(MATCH_AGAINST_FUNCTION, MATCH_AGAINST_PATTERN,
functionContributions.getTypeConfiguration().getBasicTypeRegistry().resolve(DOUBLE));
}
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

HangulUtils 클래스 멋있습니다!
혹시 static 메소드만 사용해서 abstract 으로 정의 하셨나요?!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

맞습니다! 외부에서 static 메소드를 호출하기도 하고, 다른 유틸 클래스와의 통일성을 맞추기 위해 추상클래스로 변경했습니다.

Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
package com.dreamypatisiel.devdevdev.global.utils;

/**
* 한글 처리를 위한 유틸리티 클래스
*/
public abstract class HangulUtils {

// 한글 유니코드 범위
private static final int HANGUL_START = 0xAC00; // '가'
private static final int HANGUL_END = 0xD7A3; // '힣'

// 자모 유니코드 범위
private static final int JAMO_START = 0x1100; // 'ㄱ'
private static final int JAMO_END = 0x11FF; // 'ㅿ'

// 호환 자모 유니코드 범위
private static final int COMPAT_JAMO_START = 0x3130; // 'ㄱ'
private static final int COMPAT_JAMO_END = 0x318F; // 'ㆎ'

// 한글 분해를 위한 상수
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CHOSUNG, JUNGSUNG, JONGSUNG 배열의 length 를 사용하지 않고, 따로 상수로 정의한 이유가 있을까요?!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

여러 곳에서 반복적으로 쓰여서 상수로 분리했습니다!

private static final int CHOSUNG_COUNT = 19;
private static final int JUNGSUNG_COUNT = 21;
private static final int JONGSUNG_COUNT = 28;

// 초성 배열
private static final char[] CHOSUNG = {
'ㄱ', 'ㄲ', 'ㄴ', 'ㄷ', 'ㄸ', 'ㄹ', 'ㅁ', 'ㅂ', 'ㅃ', 'ㅅ',
'ㅆ', 'ㅇ', 'ㅈ', 'ㅉ', 'ㅊ', 'ㅋ', 'ㅌ', 'ㅍ', 'ㅎ'
};

// 중성 배열
private static final char[] JUNGSUNG = {
'ㅏ', 'ㅐ', 'ㅑ', 'ㅒ', 'ㅓ', 'ㅔ', 'ㅕ', 'ㅖ', 'ㅗ', 'ㅘ',
'ㅙ', 'ㅚ', 'ㅛ', 'ㅜ', 'ㅝ', 'ㅞ', 'ㅟ', 'ㅠ', 'ㅡ', 'ㅢ', 'ㅣ'
};

// 종성 배열 (첫 번째는 받침 없음)
private static final char[] JONGSUNG = {
'\0', 'ㄱ', 'ㄲ', 'ㄳ', 'ㄴ', 'ㄵ', 'ㄶ', 'ㄷ', 'ㄹ', 'ㄺ',
'ㄻ', 'ㄼ', 'ㄽ', 'ㄾ', 'ㄿ', 'ㅀ', 'ㅁ', 'ㅂ', 'ㅄ', 'ㅅ',
'ㅆ', 'ㅇ', 'ㅈ', 'ㅊ', 'ㅋ', 'ㅌ', 'ㅍ', 'ㅎ'
};

/**
* 문자열에 한글이 포함되어 있는지 확인
*/
public static boolean hasHangul(String text) {
if (text == null || text.isEmpty()) {
return false;
}

for (char ch : text.toCharArray()) {
if (isHangul(ch)) {
return true;
}
}
return false;
}

/**
* 한글 문자열을 자모로 분해
*/
public static String convertToJamo(String text) {
if (text == null || text.isEmpty()) {
return text;
}

StringBuilder result = new StringBuilder();

for (char ch : text.toCharArray()) {
if (isCompleteHangul(ch)) {
// 완성된 한글 문자를 자모로 분해
int unicode = ch - HANGUL_START;

int chosungIndex = unicode / (JUNGSUNG_COUNT * JONGSUNG_COUNT);
int jungsungIndex = (unicode % (JUNGSUNG_COUNT * JONGSUNG_COUNT)) / JONGSUNG_COUNT;
int jongsungIndex = unicode % JONGSUNG_COUNT;

result.append(CHOSUNG[chosungIndex]);
result.append(JUNGSUNG[jungsungIndex]);

if (jongsungIndex > 0) {
result.append(JONGSUNG[jongsungIndex]);
}
} else {
// 한글이 아니거나 이미 자모인 경우 그대로 추가
result.append(ch);
}
}

return result.toString();
}

/**
* 한글 문자열에서 초성만 추출
*/
public static String extractChosung(String text) {
if (text == null || text.isEmpty()) {
return text;
}

StringBuilder result = new StringBuilder();

for (char ch : text.toCharArray()) {
if (isCompleteHangul(ch)) {
// 완성된 한글 문자에서 초성 추출
int unicode = ch - HANGUL_START;
int chosungIndex = unicode / (JUNGSUNG_COUNT * JONGSUNG_COUNT);
result.append(CHOSUNG[chosungIndex]);
} else if (isChosung(ch)) {
// 이미 초성인 경우 그대로 추가
result.append(ch);
} else if (!isHangul(ch)) {
// 한글이 아닌 문자는 그대로 추가
result.append(ch);
}
// 중성, 종성은 무시
}

return result.toString();
}

/**
* 문자가 한글인지 확인 (완성형 한글 + 자모)
*/
private static boolean isHangul(char ch) {
return isCompleteHangul(ch) || isJamo(ch) || isCompatJamo(ch);
}

/**
* 문자가 완성된 한글인지 확인
*/
private static boolean isCompleteHangul(char ch) {
return ch >= HANGUL_START && ch <= HANGUL_END;
}

/**
* 문자가 자모인지 확인
*/
private static boolean isJamo(char ch) {
return ch >= JAMO_START && ch <= JAMO_END;
}

/**
* 문자가 호환 자모인지 확인
*/
private static boolean isCompatJamo(char ch) {
return ch >= COMPAT_JAMO_START && ch <= COMPAT_JAMO_END;
}

/**
* 문자가 초성인지 확인
*/
private static boolean isChosung(char ch) {
for (char chosung : CHOSUNG) {
if (ch == chosung) {
return true;
}
}
return false;
}
}
Loading