From 5bb90458a4e8605e4c69934778065ad397bb1617 Mon Sep 17 00:00:00 2001 From: chanbyeong <122460524+chanbyoung@users.noreply.github.com> Date: Sat, 10 May 2025 14:36:10 +0900 Subject: [PATCH 01/36] =?UTF-8?q?=E2=9C=A8=20Feature:=20#12=20=EA=B3=B5?= =?UTF-8?q?=ED=86=B5=20=EC=9D=91=EB=8B=B5=20=EB=B0=8F=20=EC=A0=84=EC=97=AD?= =?UTF-8?q?=20=EC=98=A4=EB=A5=98=20=EC=B2=98=EB=A6=AC=20=EA=B5=AC=ED=98=84?= =?UTF-8?q?=20(#25)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ✨ Feat: 공통 응답 처리 구현 * ✨ Feat: 전역 예외 처리 구현 * ✨ Feat: 예외 처리 예시 코드 구현 * ♻️ Refactor: HTTP 메서드 수정 * 📦 Chore: yml `open-api` 설정 추가 * ✨ Feat: 로깅 처리 추가 * 💄 Style: 주석문 날짜 변경 * ♻️ Refactor: 수정 사항 반영 * ♻️ Refactor: 임시 폴더 UseCase추가 * 📦 Chore: 누락된 설정 추가 * ♻️ Refactor: 리소스 핸들러 삭제 * ♻️ Refactor: 핸들러 위치 변경 * ♻️ Refactor: static 제거 * ♻️ Refactor: Handler ErrorCode 분리 * ♻️ Refactor: sample 코드 분리 --- .../common/exception/CustomException.java | 25 +++ .../common/exception/error/ErrorCode.java | 61 ++++++ .../exception/error/GlobalErrorCode.java | 44 ++++ .../handler/GlobalExceptionHandler.java | 192 ++++++++++++++++++ .../batch/common/response/ApiResponse.java | 57 ++++++ .../application/ExceptionSampleService.java | 39 ++++ .../in/ExceptionSampleUseCase.java | 7 + .../exception/exception/SampleException.java | 24 +++ .../error/ExceptionSampleErrorCode.java | 30 +++ .../ExceptionSampleController.java | 26 +++ src/main/resources/application.yml | 7 + 11 files changed, 512 insertions(+) create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/common/exception/CustomException.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/common/exception/error/ErrorCode.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/common/exception/error/GlobalErrorCode.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/common/exception/handler/GlobalExceptionHandler.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/common/response/ApiResponse.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/application/ExceptionSampleService.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/application/in/ExceptionSampleUseCase.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/exception/SampleException.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/exception/error/ExceptionSampleErrorCode.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/presentation/ExceptionSampleController.java diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/exception/CustomException.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/exception/CustomException.java new file mode 100644 index 0000000..ee82829 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/exception/CustomException.java @@ -0,0 +1,25 @@ +package com.likelion.backendplus4.talkpick.batch.common.exception; + +import com.likelion.backendplus4.talkpick.batch.common.exception.error.ErrorCode; + +/** + * 사용자 정의 예외의 추상 클래스 애플리케이션 전역에서 사용하는 공통 예외 상위 타입이다. + * + * @modified 2025-05-09 + * @since 2025-05-09 + */ +public abstract class CustomException extends RuntimeException { + + + // 메시지만 포함하는 기본 생성자 + public CustomException(ErrorCode errorCode) { + super(errorCode.message()); + } + + // 메시지 + 원인 예외 포함하는 생상자 + public CustomException(ErrorCode errorCode, Throwable cause) { + super(errorCode.message(), cause); + } + + public abstract ErrorCode getErrorCode(); +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/exception/error/ErrorCode.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/exception/error/ErrorCode.java new file mode 100644 index 0000000..484d375 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/exception/error/ErrorCode.java @@ -0,0 +1,61 @@ +package com.likelion.backendplus4.talkpick.batch.common.exception.error; + +import org.springframework.http.HttpStatus; + +/** + * 에러 코드 인터페이스 각 에러 항목에 대한 HTTP 상태, 에러 번호, 메시지를 제공한다. + * A[BB][CCC] + * A (1자리) : 에러 심각도 (1~5) + * 1: 클라이언트 오류 + * 2: 인증 관련 오류 + * 3: 사용자 관련 오류 + * 4: 서버 오류 + * 5: 시스템 오류 + * + * BB (2자리) : 도메인 코드 + * 10: 사용자 관련 (ex: USER_NOT_FOUND) + * 20: 인증 관련 (ex: AUTHORIZATION_FAILED) + * 30: DB 관련 오류 (ex: DB_CONNECTION_FAILED) + * 40: API 관련 오류 (ex: API_TIMEOUT) + * 50: 시스템 오류 (ex: INTERNAL_SERVER_ERROR) + * + * CCC (3자리) : 세부 오류 순번 + * 001: 첫 번째 오류 + * 002: 두 번째 오류 + * 003: 세 번째 오류, 등등 + * + * @modified 2025-05-09 + * @since 2025-05-09 + */ +public interface ErrorCode { + + /** + * HTTP 상태 반환 + * + * @return HTTP 상태 + * @author 정안식 + * @modified 2025-05-09 박찬병 + * @since 2025-05-09 + */ + HttpStatus httpStatus(); + + /** + * 에러 코드 번호 반환 + * + * @return 에러 코드 번호 + * @author 정안식 + * @modified 2025-05-09 박찬병 + * @since 2025-05-09 + */ + int codeNumber(); + + /** + * 에러 메시지 반환 + * + * @return 에러 메시지 + * @author 정안식 + * @modified 2025-05-09 박찬병 + * @since 2025-05-09 + */ + String message(); +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/exception/error/GlobalErrorCode.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/exception/error/GlobalErrorCode.java new file mode 100644 index 0000000..4571169 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/exception/error/GlobalErrorCode.java @@ -0,0 +1,44 @@ +package com.likelion.backendplus4.talkpick.batch.common.exception.error; + +import lombok.AllArgsConstructor; +import lombok.Getter; + + +/** + * 에러 코드 인터페이스 각 에러 항목에 대한 HTTP 상태, 에러 번호, 메시지를 제공한다. + * A[BB][CCC] + * A (1자리) : 에러 심각도 (1~5) + * 1: 클라이언트 오류 + * 2: 인증 관련 오류 + * 3: 사용자 관련 오류 + * 4: 서버 오류 + * 5: 시스템 오류 + * + * BB (2자리) : 도메인 코드 + * 10: 사용자 관련 (ex: USER_NOT_FOUND) + * 20: 인증 관련 (ex: AUTHORIZATION_FAILED) + * 30: DB 관련 오류 (ex: DB_CONNECTION_FAILED) + * 40: API 관련 오류 (ex: API_TIMEOUT) + * 50: 시스템 오류 (ex: INTERNAL_SERVER_ERROR) + * + * CCC (3자리) : 세부 오류 순번 + * 001: 첫 번째 오류 + * 002: 두 번째 오류 + * 003: 세 번째 오류, 등등 + * + * @modified 2025-05-09 + * @since 2025-05-09 + */ +@AllArgsConstructor +@Getter +public enum GlobalErrorCode { + + ILLEGAL_ARGUMENT_CODE(14001), + NOT_FOUND_CODE(140002), + METHOD_ARGUMENT_NOT_VALID_CODE(300001), + BIND_EXCEPTION_CODE(300002), + INTERNAL_SERVER_ERROR_CODE(500000); + + private final int code; + +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/exception/handler/GlobalExceptionHandler.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/exception/handler/GlobalExceptionHandler.java new file mode 100644 index 0000000..28252c3 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/exception/handler/GlobalExceptionHandler.java @@ -0,0 +1,192 @@ +package com.likelion.backendplus4.talkpick.batch.common.exception.handler; + +import static com.likelion.backendplus4.talkpick.batch.common.exception.error.GlobalErrorCode.*; + +import com.likelion.backendplus4.talkpick.batch.common.exception.CustomException; +import com.likelion.backendplus4.talkpick.batch.common.exception.error.ErrorCode; +import com.likelion.backendplus4.talkpick.batch.common.response.ApiResponse; +import java.util.stream.Collectors; +import lombok.extern.slf4j.Slf4j; +import org.springframework.http.HttpStatus; +import org.springframework.http.ResponseEntity; +import org.springframework.validation.BindException; +import org.springframework.web.bind.MethodArgumentNotValidException; +import org.springframework.web.bind.annotation.ExceptionHandler; +import org.springframework.web.bind.annotation.RestControllerAdvice; +import org.springframework.web.servlet.NoHandlerFoundException; + +/** + * 전역 예외 처리 클래스 + * 컨트롤러에서 발생한 예외를 공통적으로 처리한다. + * + * @modified 2025-05-09 + * @since 2025-05-09 + */ +@Slf4j +@RestControllerAdvice +public class GlobalExceptionHandler { + + /** + * CustomException 처리 + * ErrorCode 인터페이스 기반으로 확장 가능한 방식으로 처리한다. + * + * @param ex CustomException 객체 + * @return 에러 응답 + * @author 정안식 + * @modified 2025-05-09 박찬병 + * @since 2025-05-09 + */ + @ExceptionHandler(CustomException.class) + public ResponseEntity> handleCustomException(CustomException ex) { + ErrorCode errorCode = ex.getErrorCode(); + return buildErrorResponse( + errorCode.httpStatus(), + errorCode.codeNumber(), + errorCode.message(), + ex + ); + } + + /** + * IllegalArgumentException 처리 + * 잘못된 파라미터에 대한 예외 응답 처리 + * + * @param ex 예외 객체 + * @return 에러 응답 + * @author 정안식 + * @modified 2025-05-09 박찬병 + * @since 2025-05-09 + */ + @ExceptionHandler(IllegalArgumentException.class) + public ResponseEntity> handleIllegalArgumentException(IllegalArgumentException ex) { + return buildErrorResponse( + HttpStatus.BAD_REQUEST, + ILLEGAL_ARGUMENT_CODE.getCode(), + ex.getMessage(), + ex + ); + } + + /** + * MethodArgumentNotValidException 처리 + * 유효성 검사 실패에 대한 응답 처리 + * + * @param ex 예외 객체 + * @return 에러 응답 + * @author 정안식 + * @modified 2025-05-09 박찬병 + * @since 2025-05-09 + */ + @ExceptionHandler(MethodArgumentNotValidException.class) + public ResponseEntity> handleMethodArgumentNotValidException(MethodArgumentNotValidException ex) { + String errorMessage = getErrorMessage(ex); + return buildErrorResponse( + HttpStatus.BAD_REQUEST, + METHOD_ARGUMENT_NOT_VALID_CODE.getCode(), + errorMessage, + ex + ); + } + + /** + * BindException 처리 + * 폼 바인딩 유효성 실패 시 처리 + * + * @param ex BindException 오류 + * @return 에러 응답 + * @author 박찬병 + * @modified 2025-05-09 박찬병 + * @since 2025-05-09 + */ + @ExceptionHandler(BindException.class) + public ResponseEntity> handleBindException(BindException ex) { + String errorMessage = getErrorMessage(ex); + return buildErrorResponse( + HttpStatus.BAD_REQUEST, + BIND_EXCEPTION_CODE.getCode(), + errorMessage, + ex + ); + } + + /** + * NoHandlerFoundException 처리 메서드 + * + * 클라이언트가 존재하지 않는 URL 경로로 요청했을 때 발생하는 + * NoHandlerFoundException을 잡아 404 Not Found 응답을 반환합니다. + * + * @param ex 요청한 경로에 매핑된 핸들러가 없음을 나타내는 예외 + * @return HTTP 404 상태와 표준화된 에러 페이로드를 담은 ResponseEntity + * @author 박찬병 + * @modified 2025-05-09 박찬병 + * @since 2025-05-09 + */ + @ExceptionHandler(NoHandlerFoundException.class) + public ResponseEntity> handleNoHandler(NoHandlerFoundException ex) { + return buildErrorResponse( + HttpStatus.NOT_FOUND, + NOT_FOUND_CODE.getCode(), + "요청하신 경로를 찾을 수 없습니다.", + ex + ); + } + + + /** + * 기타 모든 예외 처리 + * 정의되지 않은 예외는 내부 서버 오류로 응답 + * + * @param ex 예외 객체 + * @return 에러 응답 + * @author 정안식 + * @modified 2025-05-09 박찬병 + * @since 2025-05-09 + */ + @ExceptionHandler(Exception.class) + public ResponseEntity> handleAllExceptions(Exception ex) { + return buildErrorResponse( + HttpStatus.INTERNAL_SERVER_ERROR, + INTERNAL_SERVER_ERROR_CODE.getCode(), + "알 수 없는 오류가 발생했습니다.", + ex + ); + } + + /** + * 공통 에러 응답 생성 메서드 + * 예외 로깅 후 ApiResponse.error를 통해 표준화된 에러 응답을 생성한다. + * + * @param status HTTP 상태 코드 + * @param errorCode 에러 코드 (정수형) + * @param message 에러 메시지 + * @param ex 발생한 예외 객체 + * @return ResponseEntity> 형태의 에러 응답 + * @author 박찬병 + * @modified 2025-05-09 박찬병 + * @since 2025-05-09 + */ + private ResponseEntity> buildErrorResponse( + HttpStatus status, + int errorCode, + String message, + Throwable ex + ) { + log.error("{}: {}", ex.getClass().getSimpleName(), ex.getMessage(), ex); + return ApiResponse.error(status, String.valueOf(errorCode), message); + } + + /** + * BindingResult 분석 후 필드별 오류 메시지 조합 + * + * @param ex BindException 또는 MethodArgumentNotValidException 객체 + * @return 필드명과 메시지를 콤마로 연결한 오류 문자열 + * @author 박찬병 + * @modified 2025-05-09 박찬병 + * @since 2025-05-09 + */ + private String getErrorMessage(BindException ex) { + return ex.getBindingResult().getFieldErrors().stream() + .map(fe -> fe.getField() + ": " + fe.getDefaultMessage()) + .collect(Collectors.joining(", ")); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/response/ApiResponse.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/response/ApiResponse.java new file mode 100644 index 0000000..6d92a67 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/response/ApiResponse.java @@ -0,0 +1,57 @@ +package com.likelion.backendplus4.talkpick.batch.common.response; + +import com.fasterxml.jackson.annotation.JsonInclude; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; +import lombok.NoArgsConstructor; +import org.springframework.http.HttpStatus; +import org.springframework.http.ResponseEntity; + +/** + * API 응답 포맷 클래스 정상 및 에러 응답을 통합된 형식으로 제공한다. + * + * @since 2025-05-09 + * @modified 2025-05-09 + */ +@Getter +@JsonInclude(JsonInclude.Include.NON_NULL) +@Builder +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor +public class ApiResponse { + + private static final String SUCCESS_MESSAGE = "요청 성공"; + + private String errorCode; + private String message; + private T data; + + public static ResponseEntity> success() { + ApiResponse body = ApiResponse.builder() + .message(SUCCESS_MESSAGE) + .build(); + return ResponseEntity.ok(body); + } + + + public static ResponseEntity> success(T data) { + ApiResponse body = ApiResponse.builder() + .message(SUCCESS_MESSAGE) + .data(data) + .build(); + return ResponseEntity.ok(body); + } + + + public static ResponseEntity> error(HttpStatus status, String errorCode, + String message) { + ApiResponse body = ApiResponse.builder() + .errorCode(errorCode) + .message(message) + .build(); + return ResponseEntity.status(status).body(body); + } + +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/application/ExceptionSampleService.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/application/ExceptionSampleService.java new file mode 100644 index 0000000..ff9a7d9 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/application/ExceptionSampleService.java @@ -0,0 +1,39 @@ +package com.likelion.backendplus4.talkpick.batch.sample.common.exception.application; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.likelion.backendplus4.talkpick.batch.sample.common.exception.application.in.ExceptionSampleUseCase; +import com.likelion.backendplus4.talkpick.batch.sample.common.exception.exception.SampleException; +import com.likelion.backendplus4.talkpick.batch.sample.common.exception.exception.error.ExceptionSampleErrorCode; +import org.springframework.stereotype.Service; + +@Service +public class ExceptionSampleService implements ExceptionSampleUseCase { + + /** + * 예외 처리 예시 로직입니다. + */ + @Override + public String failCase1() { + boolean somethingWrong = true; + if (somethingWrong) { + throw new SampleException(ExceptionSampleErrorCode.SAMPLE_EXCEPTION); + } + return null; + } + + /** + * 예외 처리 예시 로직입니다. + * e를 담아서 반환하는 경우 입니다. + */ + @Override + public String failCase2(String arrayNode) { + try { + throw new JsonProcessingException("강제 예외 발생") { }; + } catch (JsonProcessingException e) { + // 원인 예외(e)를 함께 전달 + throw new SampleException(ExceptionSampleErrorCode.SAMPLE_EXCEPTION, e); + } + } + + +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/application/in/ExceptionSampleUseCase.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/application/in/ExceptionSampleUseCase.java new file mode 100644 index 0000000..d72e969 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/application/in/ExceptionSampleUseCase.java @@ -0,0 +1,7 @@ +package com.likelion.backendplus4.talkpick.batch.sample.common.exception.application.in; + +public interface ExceptionSampleUseCase { + String failCase1(); + String failCase2(String arrayNode); + +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/exception/SampleException.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/exception/SampleException.java new file mode 100644 index 0000000..0e9568f --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/exception/SampleException.java @@ -0,0 +1,24 @@ +package com.likelion.backendplus4.talkpick.batch.sample.common.exception.exception; + +import com.likelion.backendplus4.talkpick.batch.common.exception.CustomException; +import com.likelion.backendplus4.talkpick.batch.common.exception.error.ErrorCode; + +public class SampleException extends CustomException { + + private final ErrorCode errorCode; + + public SampleException(ErrorCode errorCode) { + super(errorCode); + this.errorCode = errorCode; + } + + public SampleException(ErrorCode errorCode, Throwable cause) { + super(errorCode, cause); + this.errorCode = errorCode; + } + + @Override + public ErrorCode getErrorCode() { + return errorCode; + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/exception/error/ExceptionSampleErrorCode.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/exception/error/ExceptionSampleErrorCode.java new file mode 100644 index 0000000..29872c5 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/exception/error/ExceptionSampleErrorCode.java @@ -0,0 +1,30 @@ +package com.likelion.backendplus4.talkpick.batch.sample.common.exception.exception.error; + +import com.likelion.backendplus4.talkpick.batch.common.exception.error.ErrorCode; +import lombok.AllArgsConstructor; +import org.springframework.http.HttpStatus; + +@AllArgsConstructor +public enum ExceptionSampleErrorCode implements ErrorCode { + + SAMPLE_EXCEPTION(HttpStatus.INTERNAL_SERVER_ERROR, 440000, "실패"); + + private final HttpStatus status; + private final int code; + private final String message; + + @Override + public HttpStatus httpStatus() { + return status; + } + + @Override + public int codeNumber() { + return code; + } + + @Override + public String message() { + return message; + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/presentation/ExceptionSampleController.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/presentation/ExceptionSampleController.java new file mode 100644 index 0000000..9094078 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/presentation/ExceptionSampleController.java @@ -0,0 +1,26 @@ +package com.likelion.backendplus4.talkpick.batch.sample.common.exception.presentation; + +import com.likelion.backendplus4.talkpick.batch.sample.common.exception.application.ExceptionSampleService; +import lombok.RequiredArgsConstructor; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +@RestController +@RequiredArgsConstructor +@RequestMapping("/temp") +public class ExceptionSampleController { + + private final ExceptionSampleService sampleService; + + + @GetMapping("/fail-case1") + public String failCase1() { + return sampleService.failCase1(); + } + + @GetMapping("/fail-case2") + public String failCase2() { + return sampleService.failCase2(null); + } +} diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index 896c2ce..643f53e 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -1,5 +1,8 @@ server: port: ${WEB_PORT:8082} + error: + whitelabel: + enabled: false spring: application: @@ -17,6 +20,10 @@ spring: hibernate: format_sql: true open-in-view: false + web: + resources: + add-mappings: false + logging: pattern: file: "%d{yyyy-MM-dd HH:mm:ss.SSS} [%level] [%thread] [%logger{36}] - %msg%n" From e3d54a72e8a67d09e9319fc22d14bccc6e401e3f Mon Sep 17 00:00:00 2001 From: chanbyeong <122460524+chanbyoung@users.noreply.github.com> Date: Sat, 10 May 2025 17:35:55 +0900 Subject: [PATCH 02/36] =?UTF-8?q?=E2=9C=A8=20Feature:=20#13=20p6spy=20?= =?UTF-8?q?=EC=A0=81=EC=9A=A9=20(#32)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 📦 Chore: p6spy 설치 * ✨ Feat: p6spy 적용 * ♻️ Refactor: config 폴더 이동 * ♻️ Refactor: 수정 사항 반영 --- build.gradle | 4 + .../configuraition/p6spy/P6spyConfig.java | 147 ++++++++++++++++++ src/main/resources/application.yml | 6 + 3 files changed, 157 insertions(+) create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuraition/p6spy/P6spyConfig.java diff --git a/build.gradle b/build.gradle index 8993aee..3021d3a 100644 --- a/build.gradle +++ b/build.gradle @@ -54,6 +54,10 @@ dependencies { //Swagger implementation 'org.springdoc:springdoc-openapi-starter-webmvc-ui:2.7.0' + //P6spy + implementation "com.github.gavlyukovskiy:p6spy-spring-boot-starter:1.9.0" + + } tasks.named('test') { diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuraition/p6spy/P6spyConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuraition/p6spy/P6spyConfig.java new file mode 100644 index 0000000..04d7190 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuraition/p6spy/P6spyConfig.java @@ -0,0 +1,147 @@ +package com.likelion.backendplus4.talkpick.batch.common.configuraition.p6spy; + +import com.p6spy.engine.logging.Category; +import com.p6spy.engine.spy.P6SpyOptions; +import com.p6spy.engine.spy.appender.MessageFormattingStrategy; +import jakarta.annotation.PostConstruct; +import java.util.Locale; +import org.hibernate.engine.jdbc.internal.FormatStyle; +import org.springframework.context.annotation.Configuration; + +/** + * P6Spy SQL 로깅 설정 클래스 + * P6Spy 의 MessageFormattingStrategy 를 구현하여, + * SQL 로그를 카테고리, 실행 시간, 포맷된 쿼리로 출력하도록 커스터마이징합니다. + * + * @since 2025-05-09 + * @modified 2025-05-09 + */ +@Configuration +public class P6spyConfig implements MessageFormattingStrategy { + + /** + * Spring 컨텍스트 초기화 직후 호출되어, P6Spy 옵션에 이 클래스의 포맷터를 사용하도록 등록합니다. + * + * @author 박찬병 + * @modified 2025-05-09 + * @since 2025-05-09 + */ + @PostConstruct + public void setLogMessageFormat() { + // P6Spy 의 ActiveInstance 에 포맷터 클래스 이름을 지정 + P6SpyOptions.getActiveInstance() + .setLogMessageFormat(this.getClass().getName()); + } + + /** + * 실제 로그 메시지를 생성하는 엔트리 포인트 메서드. 카테고리에 따라 SQL 을 포맷팅하고, 실행 시간과 함께 출력합니다. + * + * @param connectionId 커넥션 고유 ID + * @param now 로그 출력 시각 (문자열) + * @param elapsed 쿼리 실행 경과 시간 (ms) + * @param category P6Spy 로깅 카테고리 (STATEMENT, RESULT, COMMIT 등) + * @param prepared PreparedStatement 템플릿 (파라미터 바인딩 전 SQL) + * @param sql 바인딩된 실제 SQL + * @param url 데이터소스 URL + * @return 카테고리, 실행 시간, 포맷된 SQL 을 포함한 로그 문자열 + * @author 박찬병 + * @modified 2025-05-09 + * @since 2025-05-09 + */ + @Override + public String formatMessage( + int connectionId, + String now, + long elapsed, + String category, + String prepared, + String sql, + String url + ) { + sql = formatSql(category, sql); + return String.format("[%s] | %d ms | %s", category, elapsed, sql); + } + + /** + * SQL 문을 읽기 좋게 포맷팅합니다. DDL 문(create/alter/comment)인 경우에는 FormatStyle.DDL, 그 외 쿼리는 + * FormatStyle.BASIC 스타일을 적용합니다. + * + * @param category P6Spy 로깅 카테고리 + * @param sql 실제 실행된 SQL + * @return 포맷팅된 SQL (또는 SQL 이 비어있으면 원본 반환) + * @author 박찬병 + * @modified 2025-05-09 + * @since 2025-05-10 + */ + private String formatSql(String category, String sql) { + if (isEmptySql(sql)) { + return sql; + } + + if (isStatementCategory(category)) { + return formatStatementSql(sql); + } + + return sql; + } + + + /** + * SQL이 비어있는지 확인합니다. + * + * @param sql 실행된 SQL 문자열 + * @return 비어있으면 true, 아니면 false + * @author 박찬병 + * @modified 2025-05-10 + * @since 2025-05-10 + */ + private boolean isEmptySql(String sql) { + return sql == null || sql.isBlank(); + } + + /** + * 주어진 카테고리가 STATEMENT 인지 여부를 판단합니다. + * + * @param category P6Spy 로깅 카테고리 + * @return STATEMENT 카테고리이면 true, 아니면 false + * @author 박찬병 + * @modified 2025-05-10 + * @since 2025-05-10 + */ + private boolean isStatementCategory(String category) { + return Category.STATEMENT.getName().equals(category); + } + + /** + * STATEMENT 카테고리의 SQL을 포맷팅합니다. + * + * @param sql 실행된 SQL 문자열 + * @return 포맷팅된 SQL + * @author 박찬병 + * @modified 2025-05-09 + * @since 2025-05-10 + */ + private String formatStatementSql(String sql) { + if (isDdlStatement(sql)) { + return FormatStyle.DDL.getFormatter().format(sql); + } else { + return FormatStyle.BASIC.getFormatter().format(sql); + } + } + + /** + * 주어진 SQL 문이 DDL(create/alter/comment) 문인지 여부를 판단합니다. + * + * @param sql 실행된 SQL 문자열 + * @return DDL 문이면 true, 아니면 false + * @author 박찬병 + * @modified 2025-05-09 + * @since 2025-05-10 + */ + private boolean isDdlStatement(String sql) { + String trimmedSQL = sql.trim().toLowerCase(Locale.ROOT); + return trimmedSQL.startsWith("create") + || trimmedSQL.startsWith("alter") + || trimmedSQL.startsWith("comment"); + } +} \ No newline at end of file diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index 643f53e..e6955c2 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -28,3 +28,9 @@ logging: pattern: file: "%d{yyyy-MM-dd HH:mm:ss.SSS} [%level] [%thread] [%logger{36}] - %msg%n" console: "%cyan(%d{yyyy-MM-dd HH:mm:ss.SSS}) %highlight(%-5level) %yellow([%thread]) %green(%logger{36}) - %msg%n" + +#p6spy +decorator: + datasource: + p6spy: + enable-logging: true \ No newline at end of file From 6e11be555a3748331e3e4b8a5049cbe0b73122e5 Mon Sep 17 00:00:00 2001 From: JUNG ANSIK Date: Sat, 10 May 2025 18:46:33 +0900 Subject: [PATCH 03/36] =?UTF-8?q?=E2=9C=A8Feature:=20#17=20logback=20?= =?UTF-8?q?=EA=B8=B0=EB=B0=98=20=EB=A1=9C=EA=B9=85=20=EC=8B=9C=EC=8A=A4?= =?UTF-8?q?=ED=85=9C=20=EA=B5=AC=EC=B6=95=20(#31)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ✨ Feature/#17-LogBack-기반-로깅-시스템-구축 * ✨ feat: Logging을 위한 Annotation 4개를 추가하였습니다(Interface, Aspect 각 4개씩 추가) * ♻️ Refactor: Aspect 클래스에 @Order애노테이션을 추가하였습니다., 애노테이션 사용 예시를 추가하였습니다. * 📝 Docs: Logging기능 전반에 걸쳐 주석을 추가하였습니다. * 📦 Chore: Logging기능 패키지 구조를 변경하였습니다. --------- Co-authored-by: Yejeong, Ham --- .../annotation/logging/EntryExitLog.java | 25 +++ .../common/annotation/logging/LogJson.java | 25 +++ .../annotation/logging/LogMethodValues.java | 25 +++ .../annotation/logging/TimeTracker.java | 25 +++ .../aop/logging/EntryExitLogAspect.java | 63 +++++++ .../common/aop/logging/LogJsonAspect.java | 90 +++++++++ .../aop/logging/LogMethodValuesAspect.java | 66 +++++++ .../common/aop/logging/TimeTrackerAspect.java | 67 +++++++ .../batch/common/configuration/WebConfig.java | 36 ++++ .../configuration/logging/LogbackConfig.java | 178 ++++++++++++++++++ .../decorator/logging/MdcTaskDecorator.java | 41 ++++ .../interceptor/logging/LogInterceptor.java | 89 +++++++++ .../common/logging/TestLoggingController.java | 24 +++ .../common/logging/TestLoggingRequest.java | 11 ++ .../common/logging/TestLoggingService.java | 18 ++ src/main/resources/application.yml | 12 +- 16 files changed, 791 insertions(+), 4 deletions(-) create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/common/annotation/logging/EntryExitLog.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/common/annotation/logging/LogJson.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/common/annotation/logging/LogMethodValues.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/common/annotation/logging/TimeTracker.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/common/aop/logging/EntryExitLogAspect.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/common/aop/logging/LogJsonAspect.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/common/aop/logging/LogMethodValuesAspect.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/common/aop/logging/TimeTrackerAspect.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/WebConfig.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/logging/LogbackConfig.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/common/decorator/logging/MdcTaskDecorator.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/common/interceptor/logging/LogInterceptor.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/logging/TestLoggingController.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/logging/TestLoggingRequest.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/logging/TestLoggingService.java diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/annotation/logging/EntryExitLog.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/annotation/logging/EntryExitLog.java new file mode 100644 index 0000000..b6326a1 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/annotation/logging/EntryExitLog.java @@ -0,0 +1,25 @@ +package com.likelion.backendplus4.talkpick.batch.common.annotation.logging; + +import java.lang.annotation.Documented; +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * 메서드의 진입 및 종료 시점을 기록하기 위한 애노테이션 + * + * @since 2025-05-10 + */ +@Target(ElementType.METHOD) +@Retention(RetentionPolicy.RUNTIME) +@Documented +public @interface EntryExitLog { + /** + * 기록할 로그 레벨을 지정한다. + * + * @return 로그 레벨 문자열 (예: "debug", "info") + * @since 2025-05-10 + */ + String logLevel() default "info"; +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/annotation/logging/LogJson.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/annotation/logging/LogJson.java new file mode 100644 index 0000000..e7cd758 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/annotation/logging/LogJson.java @@ -0,0 +1,25 @@ +package com.likelion.backendplus4.talkpick.batch.common.annotation.logging; + +import java.lang.annotation.Documented; +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * 메서드의 진입 및 종료 시점을 기록하기 위한 애노테이션 + * + * @since 2025-05-10 + */ +@Target(ElementType.METHOD) +@Retention(RetentionPolicy.RUNTIME) +@Documented +public @interface LogJson { + /** + * 기록할 로그 레벨을 지정한다. + * + * @return 로그 레벨 문자열 (예: "debug", "info") + * @since 2025-05-10 + */ + String logLevel() default "info"; +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/annotation/logging/LogMethodValues.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/annotation/logging/LogMethodValues.java new file mode 100644 index 0000000..6ae5f26 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/annotation/logging/LogMethodValues.java @@ -0,0 +1,25 @@ +package com.likelion.backendplus4.talkpick.batch.common.annotation.logging; + +import java.lang.annotation.Documented; +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * 메서드의 진입 및 종료 시점을 기록하기 위한 애노테이션 + * + * @since 2025-05-10 + */ +@Target(ElementType.METHOD) +@Retention(RetentionPolicy.RUNTIME) +@Documented +public @interface LogMethodValues { + /** + * 기록할 로그 레벨을 지정한다. + * + * @return 로그 레벨 문자열 (예: "debug", "info") + * @since 2025-05-10 + */ + String logLevel() default "info"; +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/annotation/logging/TimeTracker.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/annotation/logging/TimeTracker.java new file mode 100644 index 0000000..ddec90f --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/annotation/logging/TimeTracker.java @@ -0,0 +1,25 @@ +package com.likelion.backendplus4.talkpick.batch.common.annotation.logging; + +import java.lang.annotation.Documented; +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * 메서드의 진입 및 종료 시점을 기록하기 위한 애노테이션 + * + * @since 2025-05-10 + */ +@Target(ElementType.METHOD) +@Retention(RetentionPolicy.RUNTIME) +@Documented +public @interface TimeTracker { + /** + * 기록할 로그 레벨을 지정한다. + * + * @return 로그 레벨 문자열 (예: "debug", "info") + * @since 2025-05-10 + */ + String logLevel() default "info"; +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/aop/logging/EntryExitLogAspect.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/aop/logging/EntryExitLogAspect.java new file mode 100644 index 0000000..6aab7c3 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/aop/logging/EntryExitLogAspect.java @@ -0,0 +1,63 @@ +package com.likelion.backendplus4.talkpick.batch.common.aop.logging; + +import org.aspectj.lang.ProceedingJoinPoint; +import org.aspectj.lang.annotation.Around; +import org.aspectj.lang.annotation.Aspect; +import org.springframework.core.annotation.Order; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.EntryExitLog; + +import lombok.extern.slf4j.Slf4j; + +/** + * EntryExitLog 애노테이션이 적용된 메서드의 진입과 종료 시점을 로그로 기록하는 AOP 클래스 + * + * @since 2025-05-10 + */ +@Aspect +@Component +@Slf4j +@Order(1) +public class EntryExitLogAspect { + + /** + * EntryExitLog 애노테이션이 적용된 메서드를 감싸서 시작 전과 종료 후에 로그를 출력한다. + * + * @param pjp 실행 중인 JoinPoint + * @param entryExitLog EntryExitLog 애노테이션 정보 + * @return 메서드 실행 결과 객체 + * @throws Throwable 메서드 실행 중 발생한 예외 + * @author 정안식 + * @since 2025-05-10 + */ + @Around("@annotation(entryExitLog)") + public Object logAround(ProceedingJoinPoint pjp, EntryExitLog entryExitLog) throws Throwable { + String className = pjp.getTarget().getClass().getSimpleName(); + String method = pjp.getSignature().toShortString(); + String logLevel = entryExitLog.logLevel().toLowerCase(); + log(logLevel, "{}-{} 메서드 시작", className, method); + + Object result = pjp.proceed(); + + log(logLevel, "{}-{} 메서드 종료", className, method); + return result; + } + + /** + * 지정된 로그 레벨에 따라 메시지를 출력한다. + * + * @param logLevel 로그 레벨 (debug 또는 info) + * @param format 출력할 메시지 포맷 + * @param args 포맷에 전달할 인자 + * @author 정안식 + * @since 2025-05-10 + */ + private void log(String logLevel, String format, Object... args) { + if ("debug".equals(logLevel)) { + log.debug(format, args); + } else { + log.info(format, args); + } + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/aop/logging/LogJsonAspect.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/aop/logging/LogJsonAspect.java new file mode 100644 index 0000000..d007d30 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/aop/logging/LogJsonAspect.java @@ -0,0 +1,90 @@ +package com.likelion.backendplus4.talkpick.batch.common.aop.logging; + +import org.aspectj.lang.ProceedingJoinPoint; +import org.aspectj.lang.annotation.Around; +import org.aspectj.lang.annotation.Aspect; +import org.springframework.core.annotation.Order; +import org.springframework.stereotype.Component; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.LogJson; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +/** + * LogJson 애노테이션이 적용된 메서드의 입력값과 반환값을 JSON 형식으로 변환하여 로그로 기록하는 AOP 클래스 + * + * @since 2025-05-10 + */ +@Aspect +@Component +@Slf4j +@RequiredArgsConstructor +@Order(3) +public class LogJsonAspect { + + private final ObjectMapper objectMapper; + + /** + * LogJson 애노테이션이 적용된 메서드의 입력값과 반환값을 JSON 문자열로 변환하여 로그로 출력한다. + * + * @param pjp 실행 중인 JoinPoint + * @param logJson LogJson 애노테이션 정보 + * @return 메서드 실행 결과 + * @throws Throwable 실행 중 발생한 예외 + * @author 정안식 + * @since 2025-05-10 + */ + @Around("@annotation(logJson)") + public Object logJson(ProceedingJoinPoint pjp, LogJson logJson) throws Throwable { + String className = pjp.getTarget().getClass().getSimpleName(); + String method = pjp.getSignature().toShortString(); + String logLevel = logJson.logLevel().toLowerCase(); + + logJsonSafely(logLevel, "{}-{} 메서드 [JSON 입력값] - {}", className, method, pjp.getArgs()); + + Object result = pjp.proceed(); + + logJsonSafely(logLevel, "{}-{} 메서드 [JSON 반환값] - {}", className, method, result); + + return result; + } + + /** + * JSON 변환에 실패하지 않도록 안전하게 로그를 출력한다. + * + * @param logLevel 로그 레벨 (debug 또는 info) + * @param format 로그에 출력할 메시지 포맷 + * @param className 클래스 이름 문자열 + * @param method 메서드 정보 문자열 + * @param target 변환 대상 객체 + * @author 정안식 + * @since 2025-05-10 + */ + private void logJsonSafely(String logLevel, String format, String className, String method, Object target) { + try { + String json = objectMapper.writeValueAsString(target); + log(logLevel, format, className, method, json); + } catch (Exception e) { + log.warn("{}-{} 메서드 JSON 변환 실패", className, method, e); + } + } + + /** + * 지정된 로그 레벨에 따라 메시지를 출력한다. + * + * @param logLevel 로그 레벨 (debug 또는 info) + * @param format 출력할 메시지 포맷 + * @param args 포맷에 전달할 인자 + * @author 정안식 + * @since 2025-05-10 + */ + private void log(String logLevel, String format, Object... args) { + if ("debug".equals(logLevel)) { + log.debug(format, args); + } else { + log.info(format, args); + } + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/aop/logging/LogMethodValuesAspect.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/aop/logging/LogMethodValuesAspect.java new file mode 100644 index 0000000..60750cc --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/aop/logging/LogMethodValuesAspect.java @@ -0,0 +1,66 @@ +package com.likelion.backendplus4.talkpick.batch.common.aop.logging; + +import java.util.Arrays; + +import org.aspectj.lang.ProceedingJoinPoint; +import org.aspectj.lang.annotation.Around; +import org.aspectj.lang.annotation.Aspect; +import org.springframework.core.annotation.Order; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.LogMethodValues; + +import lombok.extern.slf4j.Slf4j; + +/** + * LogMethodValues 애노테이션이 적용된 메서드의 인자와 반환값을 로그로 기록하는 AOP 클래스 + * + * @since 2025-05-10 + */ +@Aspect +@Component +@Slf4j +@Order(1) +public class LogMethodValuesAspect { + + /** + * LogMethodValues 애노테이션이 적용된 메서드의 인자와 반환값을 로그로 출력한다. + * + * @param pjp 실행 중인 JoinPoint + * @param logMethodValues LogMethodValues 애노테이션 정보 + * @return 메서드 실행 결과 + * @throws Throwable 실행 중 발생한 예외 + * @author 정안식 + * @since 2025-05-10 + */ + @Around("@annotation(logMethodValues)") + public Object logArgsAndReturn(ProceedingJoinPoint pjp, LogMethodValues logMethodValues) throws Throwable { + String className = pjp.getTarget().getClass().getSimpleName(); + String method = pjp.getSignature().toShortString(); + String logLevel = logMethodValues.logLevel().toLowerCase(); + + log(logLevel, "{}-{}메서드[ARGS] - {}", className, method, Arrays.toString(pjp.getArgs())); + + Object result = pjp.proceed(); + + log(logLevel, "{}-{}메서드[RETURN] - {}", className, method, result); + return result; + } + + /** + * 지정된 로그 레벨에 따라 메시지를 출력한다. + * + * @param logLevel 로그 레벨 (debug 또는 info) + * @param format 출력할 메시지 포맷 + * @param args 포맷에 전달할 인자 + * @author 정안식 + * @since 2025-05-10 + */ + private void log(String logLevel, String format, Object... args) { + if ("debug".equals(logLevel)) { + log.debug(format, args); + } else { + log.info(format, args); + } + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/aop/logging/TimeTrackerAspect.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/aop/logging/TimeTrackerAspect.java new file mode 100644 index 0000000..1482610 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/aop/logging/TimeTrackerAspect.java @@ -0,0 +1,67 @@ +package com.likelion.backendplus4.talkpick.batch.common.aop.logging; + +import org.aspectj.lang.ProceedingJoinPoint; +import org.aspectj.lang.annotation.Around; +import org.aspectj.lang.annotation.Aspect; +import org.springframework.core.annotation.Order; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.TimeTracker; + +import lombok.extern.slf4j.Slf4j; + +/** + * TimeTracker 애노테이션이 적용된 메서드의 실행 시간을 측정하여 로그로 남기는 AOP 클래스 + * + * @since 2025-05-10 + */ +@Aspect +@Component +@Slf4j +@Order(2) +public class TimeTrackerAspect { + + /** + * TimeTracker 애노테이션이 적용된 메서드를 감싸서 실행 시간을 기록하고 로그를 출력한다. + * + * @param pjp 실행 중인 JoinPoint + * @param timeTracker TimeTracker 애노테이션 정보 + * @return 메서드 실행 결과 + * @throws Throwable 실행 중 발생한 예외 + * @author 정안식 + * @since 2025-05-10 + */ + @Around("@annotation(timeTracker)") + public Object trackTime(ProceedingJoinPoint pjp, TimeTracker timeTracker) throws Throwable { + String method = pjp.getSignature().toShortString(); + String logLevel = timeTracker.logLevel().toLowerCase(); + + long start = System.currentTimeMillis(); + + Object result = pjp.proceed(); + + long elapsedMillis = System.currentTimeMillis() - start; + double elapsedSeconds = elapsedMillis / 1000.0; + String formatted = String.format("%.3f", elapsedSeconds); + + log(logLevel, "{} 실행 시간 = {} 초", method, formatted); + return result; + } + + /** + * 지정된 로그 레벨에 따라 메시지를 출력한다. + * + * @param logLevel 로그 레벨 (debug 또는 info) + * @param format 출력할 메시지 포맷 + * @param args 포맷에 전달할 인자 + * @author 정안식 + * @since 2025-05-10 + */ + private void log(String logLevel, String format, Object... args) { + if ("debug".equals(logLevel)) { + log.debug(format, args); + } else { + log.info(format, args); + } + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/WebConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/WebConfig.java new file mode 100644 index 0000000..24b5c5c --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/WebConfig.java @@ -0,0 +1,36 @@ +package com.likelion.backendplus4.talkpick.batch.common.configuration; + +import org.springframework.context.annotation.Configuration; +import org.springframework.web.servlet.config.annotation.InterceptorRegistry; +import org.springframework.web.servlet.config.annotation.WebMvcConfigurer; + +import com.likelion.backendplus4.talkpick.batch.common.interceptor.logging.LogInterceptor; + +import lombok.RequiredArgsConstructor; + +/** + * 인터셉터를 등록하는 Web MVC 설정 클래스 + * + * @since 2025-05-10 + */ +@Configuration +@RequiredArgsConstructor +public class WebConfig implements WebMvcConfigurer { + + private static final String ALL_PATTERN = "/**"; + + private final LogInterceptor logInterceptor; + + /** + * LogInterceptor를 모든 경로에 등록한다. + * + * @param registry InterceptorRegistry 인터셉터 레지스트리 + * @author 정안식 + * @since 2025-05-10 + */ + @Override + public void addInterceptors(InterceptorRegistry registry) { + registry.addInterceptor(logInterceptor) + .addPathPatterns(ALL_PATTERN); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/logging/LogbackConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/logging/LogbackConfig.java new file mode 100644 index 0000000..a2f033f --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/logging/LogbackConfig.java @@ -0,0 +1,178 @@ +package com.likelion.backendplus4.talkpick.batch.common.configuration.logging; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Configuration; + +import ch.qos.logback.classic.Level; +import ch.qos.logback.classic.LoggerContext; +import ch.qos.logback.classic.encoder.PatternLayoutEncoder; +import ch.qos.logback.classic.spi.ILoggingEvent; +import ch.qos.logback.core.ConsoleAppender; +import ch.qos.logback.core.FileAppender; +import ch.qos.logback.core.rolling.TimeBasedRollingPolicy; +import ch.qos.logback.core.util.FileSize; +import jakarta.annotation.PostConstruct; + +/** + * Logback 설정을 위한 구성 클래스. + * application.properties의 log.rolling.* 설정에 따라 + * 콘솔 및 파일 appender를 생성하고 루트 로거에 등록한다. + * + * @since 2025-05-10 + */ +@Configuration +public class LogbackConfig { + @Value("${log.rolling.directory}") + private String LOG_DIRECTORY; + @Value("${log.rolling.file-name}") + private String LOG_FILE_NAME; + @Value("${log.rolling.pattern}") + private String LOG_PATTERN; + @Value("${log.rolling.max-history}") + private int MAX_HISTORY; + @Value("${log.rolling.total-size-cap}") + private String TOTAL_SIZE_CAP; + + /** + * 로그 설정을 초기화하고 콘솔 및 파일 appender를 구성한다. + * + * @author 정안식 + * @since 2025-05-10 + */ + @PostConstruct + public void configure() { + LoggerContext context = initializeLoggerContext(); + createLogDirectory(); + + ConsoleAppender consoleAppender = createConsoleAppender(context); + FileAppender fileAppender = createFileAppender(context); + + configureRootLogger(context, consoleAppender, fileAppender); + } + + /** + * LoggerContext를 초기화하고 리셋하여 반환한다. + * + * @return 초기화된 LoggerContext 객체 + * @author 정안식 + * @since 2025-05-10 + */ + private LoggerContext initializeLoggerContext() { + LoggerContext context = (LoggerContext)LoggerFactory.getILoggerFactory(); + context.reset(); + return context; + } + + /** + * 로그 디렉토리를 생성한다. 존재하지 않을 경우 새로 생성한다. + * + * @author 정안식 + * @since 2025-05-10 + */ + private void createLogDirectory() { + Path logPath = Paths.get(LOG_DIRECTORY); + try { + if (!Files.exists(logPath)) { + Files.createDirectories(logPath); + } + } catch (Exception e) { + throw new RuntimeException("로그 디렉토리 생성 실패", e); + } + } + + /** + * 콘솔 appender를 생성하여 반환한다. + * + * @param context LoggerContext 객체 + * @return 생성된 ConsoleAppender + * @author 정안식 + * @since 2025-05-10 + */ + private ConsoleAppender createConsoleAppender(LoggerContext context) { + ConsoleAppender appender = new ConsoleAppender<>(); + appender.setContext(context); + appender.setEncoder(createEncoder(context)); + appender.start(); + return appender; + } + + /** + * 파일 appender를 생성하여 반환한다. + * + * @param context LoggerContext 객체 + * @return 생성된 FileAppender + * @since 2025-05-10 + */ + private FileAppender createFileAppender(LoggerContext context) { + FileAppender appender = new FileAppender<>(); + appender.setContext(context); + appender.setFile(LOG_DIRECTORY + "/" + LOG_FILE_NAME); + appender.setAppend(true); + appender.setEncoder(createEncoder(context)); + + TimeBasedRollingPolicy rollingPolicy = createRollingPolicy(context, appender); + rollingPolicy.start(); + + appender.start(); + return appender; + } + + /** + * PatternLayoutEncoder를 생성하여 반환한다. + * + * @param context LoggerContext 객체 + * @return 생성된 PatternLayoutEncoder + * @since 2025-05-10 + */ + private PatternLayoutEncoder createEncoder(LoggerContext context) { + PatternLayoutEncoder encoder = new PatternLayoutEncoder(); + encoder.setContext(context); + encoder.setPattern(LOG_PATTERN); + encoder.start(); + return encoder; + } + + /** + * 롤링 정책을 생성하여 반환한다. + * + * @param context LoggerContext 객체 + * @param parent 파일 appender + * @return 생성된 TimeBasedRollingPolicy + * @since 2025-05-10 + */ + private TimeBasedRollingPolicy createRollingPolicy(LoggerContext context, + FileAppender parent) { + TimeBasedRollingPolicy policy = new TimeBasedRollingPolicy<>(); + policy.setContext(context); + policy.setParent(parent); + policy.setFileNamePattern(LOG_DIRECTORY + "/" + LOG_FILE_NAME.replace(".log", ".%d{yyyy-MM-dd}.log")); + policy.setMaxHistory(MAX_HISTORY); + policy.setTotalSizeCap(FileSize.valueOf(TOTAL_SIZE_CAP)); + return policy; + } + + /** + * 루트 로거에 레벨 설정 및 appender를 등록한다. + * + * @param context LoggerContext 객체 + * @param consoleAppender ConsoleAppender 객체 + * @param fileAppender FileAppender 객체 + * @since 2025-05-10 + */ + private void configureRootLogger(LoggerContext context, ConsoleAppender consoleAppender, + FileAppender fileAppender) { + Logger logger = LoggerFactory.getLogger(Logger.ROOT_LOGGER_NAME); + if (logger instanceof ch.qos.logback.classic.Logger) { + ch.qos.logback.classic.Logger rootLogger = (ch.qos.logback.classic.Logger)logger; + rootLogger.setLevel(Level.INFO); + rootLogger.addAppender(consoleAppender); + rootLogger.addAppender(fileAppender); + } + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/decorator/logging/MdcTaskDecorator.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/decorator/logging/MdcTaskDecorator.java new file mode 100644 index 0000000..d958d51 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/decorator/logging/MdcTaskDecorator.java @@ -0,0 +1,41 @@ +package com.likelion.backendplus4.talkpick.batch.common.decorator.logging; + +import java.util.Map; + +import org.slf4j.MDC; +import org.springframework.core.task.TaskDecorator; +import org.springframework.stereotype.Component; + +/** + * 스레드 풀에서 실행되는 Task에 MDC(Context Map)를 전파하기 위한 TaskDecorator 구현체 + * MDC 정보를 부모 스레드에서 자식 스레드로 복사하여 로그 추적 정보를 유지하도록 한다. + * + * @since 2025-05-10 + */ +@Component +public class MdcTaskDecorator implements TaskDecorator { + + /** + * Runnable 실행 시 부모 스레드의 MDC(Context Map)를 자식 스레드로 복사하여 설정한다. + * 실행 후 MDC를 반드시 clear하여 메모리 누수를 방지한다. + * + * @param runnable 실행할 원본 Runnable + * @return MDC context를 설정한 새로운 Runnable + * @author 정안식 + * @since 2025-05-10 + */ + @Override + public Runnable decorate(Runnable runnable) { + Map contextMap = MDC.getCopyOfContextMap(); + return () -> { + if (contextMap != null) { + MDC.setContextMap(contextMap); + } + try { + runnable.run(); + } finally { + MDC.clear(); + } + }; + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/interceptor/logging/LogInterceptor.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/interceptor/logging/LogInterceptor.java new file mode 100644 index 0000000..cb5d680 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/interceptor/logging/LogInterceptor.java @@ -0,0 +1,89 @@ +package com.likelion.backendplus4.talkpick.batch.common.interceptor.logging; + +import java.util.UUID; + +import org.slf4j.MDC; +import org.springframework.stereotype.Component; +import org.springframework.web.servlet.HandlerInterceptor; + +import jakarta.servlet.http.HttpServletRequest; +import jakarta.servlet.http.HttpServletResponse; +import lombok.extern.slf4j.Slf4j; + +/** + * 로깅을 위한 인터셉터 클래스 + * HTTP 요청 전후로 TraceId를 생성·설정·제거하여 로그 추적 정보를 지원한다. + * + * @since 2025-05-10 + */ +@Slf4j +@Component +public class LogInterceptor implements HandlerInterceptor { + + /** + * 요청 처리 전에 TraceId를 생성하고 MDC에 설정한다. + * + * @param request HttpServletRequest 요청 객체 + * @param response HttpServletResponse 응답 객체 + * @param handler Object 핸들러 객체 + * @return boolean 처리 계속 여부 + * @author 정안식 + * @since 2025-05-10 + */ + @Override + public boolean preHandle(HttpServletRequest request, + HttpServletResponse response, Object handler) { + String traceId = generateTraceId(); + setTraceId(traceId); + log.info("TraceId 생성 성공 - " + traceId); + return true; + } + + /** + * 요청 처리 완료 후 MDC에 설정된 TraceId를 제거한다. + * + * @param request HttpServletRequest 요청 객체 + * @param response HttpServletResponse 응답 객체 + * @param handler Object 핸들러 객체 + * @param ex Exception 발생 예외 객체 + * @author 정안식 + * @since 2025-05-10 + */ + @Override + public void afterCompletion(HttpServletRequest request, + HttpServletResponse response, Object handler, Exception ex) { + clearTraceId(); + } + + /** + * 새로운 UUID 형식의 TraceId를 생성한다. + * + * @return String 생성된 TraceId 문자열 + * @author 정안식 + * @since 2025-05-10 + */ + private String generateTraceId() { + return UUID.randomUUID().toString(); + } + + /** + * 생성된 TraceId를 MDC에 설정한다. + * + * @param traceId String 설정할 TraceId + * @author 정안식 + * @since 2025-05-10 + */ + private void setTraceId(String traceId) { + MDC.put("traceId", traceId); + } + + /** + * MDC에 설정된 모든 정보를 제거하여 메모리 누수를 방지한다. + * + * @author 정안식 + * @since 2025-05-10 + */ + private void clearTraceId() { + MDC.clear(); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/logging/TestLoggingController.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/logging/TestLoggingController.java new file mode 100644 index 0000000..8e40f13 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/logging/TestLoggingController.java @@ -0,0 +1,24 @@ +package com.likelion.backendplus4.talkpick.batch.sample.common.logging; + +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.LogJson; + +import lombok.RequiredArgsConstructor; + +@RestController +@RequestMapping("/testController") +@RequiredArgsConstructor +public class TestLoggingController { + private final TestLoggingService testLoggingService; + + @LogJson + @PostMapping + public String test(@RequestBody TestLoggingRequest testLoggingRequest) { + System.out.println("TestController 요청 성공"); + return testLoggingService.test(testLoggingRequest.getName()); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/logging/TestLoggingRequest.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/logging/TestLoggingRequest.java new file mode 100644 index 0000000..ca57ca1 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/logging/TestLoggingRequest.java @@ -0,0 +1,11 @@ +package com.likelion.backendplus4.talkpick.batch.sample.common.logging; + +import lombok.Data; +import lombok.RequiredArgsConstructor; + +@RequiredArgsConstructor +@Data +public class TestLoggingRequest { + private final String name; + private final int age; +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/logging/TestLoggingService.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/logging/TestLoggingService.java new file mode 100644 index 0000000..4114cdc --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/logging/TestLoggingService.java @@ -0,0 +1,18 @@ +package com.likelion.backendplus4.talkpick.batch.sample.common.logging; + +import org.springframework.stereotype.Service; + +import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.EntryExitLog; +import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.LogMethodValues; +import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.TimeTracker; + +@Service +public class TestLoggingService { + @EntryExitLog + @LogMethodValues + @TimeTracker + public String test(String text) { + System.out.println("TestService 요청 성공"); + return "bye"; + } +} diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index e6955c2..b05181c 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -20,17 +20,21 @@ spring: hibernate: format_sql: true open-in-view: false +log: + rolling: + directory: logs + file-name: talkpick-batch.log + pattern: "%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - [TraceId: %X{traceId:-no-trace}] - %msg%n" + max-history: 30 + total-size-cap: 10MB web: resources: add-mappings: false - logging: pattern: file: "%d{yyyy-MM-dd HH:mm:ss.SSS} [%level] [%thread] [%logger{36}] - %msg%n" console: "%cyan(%d{yyyy-MM-dd HH:mm:ss.SSS}) %highlight(%-5level) %yellow([%thread]) %green(%logger{36}) - %msg%n" - -#p6spy decorator: datasource: p6spy: - enable-logging: true \ No newline at end of file + enable-logging: true From 47c6fbc7204331b1076b1dc9adffa0d5b840c150 Mon Sep 17 00:00:00 2001 From: "Yejeong, Ham" Date: Sat, 10 May 2025 19:11:18 +0900 Subject: [PATCH 04/36] =?UTF-8?q?=F0=9F=93=A6=20Chore:=20.gitignore=20?= =?UTF-8?q?=ED=8C=8C=EC=9D=BC=20=EC=88=98=EC=A0=95=20(#36)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 132 +++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 104 insertions(+), 28 deletions(-) diff --git a/.gitignore b/.gitignore index 5c0b59b..a2ba527 100644 --- a/.gitignore +++ b/.gitignore @@ -1,37 +1,113 @@ -HELP.md +# ============================== +# Build and Gradle +# ============================== .gradle build/ +**/build/ +!src/**/build/ + +# Gradle wrapper !gradle/wrapper/gradle-wrapper.jar -!**/src/main/**/build/ -!**/src/test/**/build/ -*.env -### STS ### -.apt_generated +!gradle-wrapper.properties + +# ============================== +# IDE 관련 설정 +# ============================== + +# IntelliJ IDEA +.idea +*.iml +*.iws +*.ipr +/out/ +.idea/**/ +.idea_modules/ +.idea/httpRequests +.idea/codestream.xml + +# Eclipse / STS +.apt_generated/ +.apt_generated_test/ .classpath -.factorypath .project -.settings +.settings/ +.factorypath .springBeans .sts4-cache -bin/ -!**/src/main/**/bin/ -!**/src/test/**/bin/ +.externalToolBuilders/ +*.launch +*.pydevproject +.cproject +.autotools +.buildpath +.loadpath +.recommenders/ +.cache-main +.scala_dependencies +.worksheet -### IntelliJ IDEA ### -.idea -*.iws -*.iml -*.ipr -out/ -!**/src/main/**/out/ -!**/src/test/**/out/ - -### NetBeans ### -/nbproject/private/ -/nbbuild/ -/dist/ -/nbdist/ -/.nb-gradle/ - -### VS Code ### +# VS Code .vscode/ +.history/ +.ionide/ + +# ============================== +# Development files +# ============================== +*.env +*.http + +# ============================== +# OS/시스템 파일 +# ============================== +.DS_Store +.AppleDouble +.LSOverride +Icon +._* +.Spotlight-V100 +.Trashes +.fseventsd +.com.apple.timemachine.donotpresent +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk +*.icloud + +# Windows +Thumbs.db +ehthumbs.db +Thumbs.db:encryptable +ehthumbs_vista.db +*.stackdump +*.lnk +Desktop.ini +$RECYCLE.BIN/ + +# ============================== +# Java 관련 +# ============================== +*.class +*.log +*.jar +*.war +*.nar +*.ear +*.zip +*.tar.gz +*.rar +*.hprof +hs_err_pid* +replay_pid* + +# ============================== +# 기타 백업/임시파일 +# ============================== +*.tmp +*.bak +*.swp +*~.nib +*~ +~$* \ No newline at end of file From a51b2d8353c80e38f82da969c8ac0c495ec57a08 Mon Sep 17 00:00:00 2001 From: Atriel <118334518+Atriel1999@users.noreply.github.com> Date: Sat, 10 May 2025 21:05:21 +0900 Subject: [PATCH 05/36] =?UTF-8?q?=E2=9C=A8=20Feature:=20#28=20rss=20?= =?UTF-8?q?=EB=8D=B0=EC=9D=B4=ED=84=B0=20=ED=8C=8C=EC=8B=B1=20=EB=B0=8F=20?= =?UTF-8?q?=EB=A7=A4=ED=95=91=20(#29)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 📦 Chore: application-dev.yml 파일 gitignore에 추가 * ✨ Feat: RSS 뉴스 도메인 모델 및 기본 구조 구현 * ✨ Feat: RSS Mapper 및 서비스 로직 구현 * ✨ Feat: RSS 데이터 수집용 스케쥴러 Quartz 스케줄러 구현 * 🐛Fix: RssNews 엔티티에서 @Data 어노테이션을 세분화된 Lombok 어노테이션으로 대체 - @Data 제거 후 @Getter로 전체 getter 생성 - 필요한 필드에만 선택적으로 @Setter 적용하여 식별자 무결성 보호 - @ToString에 description 필드 제외하여 로깅 최적화 - @EqualsAndHashCode에 id 필드만 사용하여 객체 비교 최적화 * 🐛 Fix: existsByLink 부정 조건을 not() 함수로 개선하여 가독성 향상 * ✨ Feat: application-dev에서 prefix(카테고리)를 가져오는 구조에서 enum으로 변경 * 📦 Chore: application-dev제거 및 env구조로 변경, env관련 의존성 추가(donenv) * ♻️ Refactor: prefix(언론사이름 접미사) 사용 구조 변경, 코드 구조 리팩토링 * ♻️ Refactor: Quartz 실행주기 외부파일(application)으로 분리 * ✨ Feat: 디버깅 관련 RssErrorCode & RssException 추가 * ♻️ Refactor: 요다 조건식 변경 --- application.yml | 41 +++++ build.gradle | 6 +- .../talkpick/batch/rss/entity/RssNews.java | 64 ++++++++ .../batch/rss/exception/RssErrorCode.java | 37 +++++ .../batch/rss/exception/RssException.java | 62 +++++++ .../talkpick/batch/rss/model/RssSource.java | 84 ++++++++++ .../rss/repository/RssNewsRepository.java | 11 ++ .../batch/rss/scheduler/RssScheduler.java | 55 +++++++ .../batch/rss/service/RssMappingFactory.java | 43 +++++ .../batch/rss/service/RssService.java | 154 ++++++++++++++++++ .../rss/service/mapper/DongaRssMapper.java | 54 ++++++ .../rss/service/mapper/KhanRssMapper.java | 55 +++++++ .../rss/service/mapper/KmibRssMapper.java | 62 +++++++ .../batch/rss/service/mapper/RssMapper.java | 29 ++++ 14 files changed, 756 insertions(+), 1 deletion(-) create mode 100644 application.yml create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/rss/entity/RssNews.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/rss/exception/RssErrorCode.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/rss/exception/RssException.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/rss/model/RssSource.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/rss/repository/RssNewsRepository.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/rss/scheduler/RssScheduler.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/RssMappingFactory.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/RssService.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/mapper/DongaRssMapper.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/mapper/KhanRssMapper.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/mapper/KmibRssMapper.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/mapper/RssMapper.java diff --git a/application.yml b/application.yml new file mode 100644 index 0000000..0fc5aec --- /dev/null +++ b/application.yml @@ -0,0 +1,41 @@ +spring: + datasource: + url: ${MYSQL_URL} + username: ${MYSQL_USERNAME} + password: ${MYSQL_PASSWORD} + driver-class-name: com.mysql.cj.jdbc.Driver + hikari: + maximum-pool-size: 20 + minimum-idle: 5 + idle-timeout: 30000 + connection-timeout: 20000 + + jpa: + hibernate: + ddl-auto: update + properties: + hibernate: + dialect: org.hibernate.dialect.MySQL8Dialect + format_sql: true + show-sql: true + ai: + openai: + api-key: ${OPENAI_API_KEY} + +server: + port: ${WEB_PORT:8080} + +logging: + level: + org: + hibernate: + SQL: DEBUG + type: + descriptor: + sql: + BasicBinder: TRACE + +rss: + scheduler: + cron: "0 */1 * * * ?" + diff --git a/build.gradle b/build.gradle index 3021d3a..a921de4 100644 --- a/build.gradle +++ b/build.gradle @@ -29,6 +29,8 @@ dependencies { implementation 'org.springframework.boot:spring-boot-starter-web' implementation 'com.fasterxml.jackson.dataformat:jackson-dataformat-xml' + implementation 'me.paulschwarz:spring-dotenv:3.0.0' + testImplementation 'org.springframework.boot:spring-boot-starter-test' testRuntimeOnly 'org.junit.platform:junit-platform-launcher' @@ -57,7 +59,9 @@ dependencies { //P6spy implementation "com.github.gavlyukovskiy:p6spy-spring-boot-starter:1.9.0" - + //RSS + implementation 'org.springframework.boot:spring-boot-starter-quartz' + implementation 'com.rometools:rome:1.18.0' } tasks.named('test') { diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/entity/RssNews.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/entity/RssNews.java new file mode 100644 index 0000000..3263f3f --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/entity/RssNews.java @@ -0,0 +1,64 @@ +package com.likelion.backendplus4.talkpick.batch.rss.entity; + +import jakarta.persistence.*; +import lombok.*; + +import java.time.LocalDateTime; + + +/** + * RSS 피드를 수집 객체 + * + * @author 양병학 + * @since 2025-05-10 최초 작성 + * @modify 2025-05-10 17:47 PR 수정 + * @ToString exclude로 대량의 텍스트필드 로그에서 제외 + * @Data -> @Getter후 Setter는 개별 지정해서 식별자 보호 + * @EqualsAndHashCode 지정으로 갹채 비교 최적화 + */ +@Entity +@Table(name = "rss", uniqueConstraints = @UniqueConstraint(columnNames = {"link"})) +@Getter +@NoArgsConstructor +@AllArgsConstructor +@Builder +@ToString(exclude = "description") +@EqualsAndHashCode(of = "id") +public class RssNews { + + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Long id; + + @Setter + @Column(nullable = false) + private String title; + + @Column(nullable = false, unique = true) + private String link; + + @Setter + @Column(name = "pub_date") + private LocalDateTime pubDate; + + @Column + private String category; + + @Column + private String guid; + + @Setter + @Column(columnDefinition = "TEXT") + private String description; + + @Column(name = "is_summary") + private boolean isSummary; + + @Column(name = "created_at") + private LocalDateTime createdAt; + + @PrePersist + protected void onCreate() { + createdAt = LocalDateTime.now(); + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/exception/RssErrorCode.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/exception/RssErrorCode.java new file mode 100644 index 0000000..d790111 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/exception/RssErrorCode.java @@ -0,0 +1,37 @@ +package com.likelion.backendplus4.talkpick.batch.rss.exception; + +import lombok.Getter; +import lombok.RequiredArgsConstructor; + +/** + * RSS 관련 오류 코드를 정의하는 열거형 + * + * @author 양병학 + * @since 2025-05-10 + */ +@Getter +@RequiredArgsConstructor +public enum RssErrorCode { + + // 일반 오류 + UNKNOWN_ERROR("RSS-E001", "알 수 없는 오류가 발생했습니다."), + + // RSS 정보 로드 관련 오류 + FEED_CONNECTION_ERROR("RSS-E101", "RSS 피드 연결 중 오류가 발생했습니다."), + FEED_PARSING_ERROR("RSS-E102", "RSS 피드 파싱 중 오류가 발생했습니다."), + FEED_TIMEOUT_ERROR("RSS-E103", "RSS 피드 로드 중 시간 초과가 발생했습니다."), + + // Mapper 관련 오류 + MAPPER_NOT_FOUND("RSS-E201", "요청한 매퍼를 찾을 수 없습니다."), + ITEM_MAPPING_ERROR("RSS-E202", "RSS 항목 매핑 중 오류가 발생했습니다."), + + // 데이터베이스 관련 오류 + DB_SAVE_ERROR("RSS-E301", "RSS 뉴스를 저장하는 중 오류가 발생했습니다."), + DUPLICATE_LINK_ERROR("RSS-E302", "이미 존재하는 링크입니다."), + + // 스케줄러 관련 오류 + SCHEDULER_EXECUTION_ERROR("RSS-E401", "스케줄러 실행 중 오류가 발생했습니다."); + + private final String code; + private final String message; +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/exception/RssException.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/exception/RssException.java new file mode 100644 index 0000000..2885adc --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/exception/RssException.java @@ -0,0 +1,62 @@ +package com.likelion.backendplus4.talkpick.batch.rss.exception; + +import lombok.Getter; + +/** + * RSS 예외처리 클래스 + * + * @author 양병학 + * @since 2025-05-10 + */ +@Getter +public class RssException extends RuntimeException { + + private final RssErrorCode errorCode; + + public RssException(RssErrorCode errorCode) { + super(errorCode.getMessage()); + this.errorCode = errorCode; + } + + /** + * 상세 메시지 생성자 + * + * @param errorCode 오류 코드 + * @param message 상세 메시지 + */ + public RssException(RssErrorCode errorCode, String message) { + super(message); + this.errorCode = errorCode; + } + + /** + * 원인 예외 생성자 + * + * @param errorCode 오류 코드 + * @param cause 원인 예외 + */ + public RssException(RssErrorCode errorCode, Throwable cause) { + super(errorCode.getMessage(), cause); + this.errorCode = errorCode; + } + + /** + * 상세 메시지및 원인 예외 생성자 + * + * @param errorCode 오류 코드 + * @param message 상세 메시지 + * @param cause 원인 예외 + */ + public RssException(RssErrorCode errorCode, String message, Throwable cause) { + super(message, cause); + this.errorCode = errorCode; + } + + /** + * 오류 코드와 메시지를 포함한 문자열 반환 + */ + @Override + public String toString() { + return String.format("[%s] %s", errorCode.getCode(), getMessage()); + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/model/RssSource.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/model/RssSource.java new file mode 100644 index 0000000..73e0339 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/model/RssSource.java @@ -0,0 +1,84 @@ +package com.likelion.backendplus4.talkpick.batch.rss.model; + +import lombok.Getter; + +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +/** + * RSS 뉴스 소스와 URL을 정의하는 열거형 + * 각 항목은 언론사, 카테고리, URL 정보를 포함 + * + * @author 양병학 + * @since 2025-05-10 + */ +@Getter +public enum RssSource { + // 국민일보 RSS 피드 + KMIB_POLITICS("국민일보", "정치", "https://www.kmib.co.kr/rss/data/kmibPolRss.xml", "km", true), + KMIB_ECONOMY("국민일보", "경제", "https://www.kmib.co.kr/rss/data/kmibEcoRss.xml", "km", true), + KMIB_SOCIETY("국민일보", "사회", "https://www.kmib.co.kr/rss/data/kmibSocRss.xml", "km", true), + KMIB_INTERNATIONAL("국민일보", "국제", "https://www.kmib.co.kr/rss/data/kmibIntRss.xml", "km", true), + KMIB_ENTERTAINMENT("국민일보", "연예", "https://www.kmib.co.kr/rss/data/kmibEntRss.xml", "km", true), + KMIB_SPORTS("국민일보", "스포츠", "https://www.kmib.co.kr/rss/data/kmibSpoRss.xml", "km", true), + + // 동아일보 RSS 피드 - 현재 비활성화 + DONGA_TOTAL("동아일보", "전체", "https://rss.donga.com/total.xml", "da", false), + + // 경향신문 RSS 피드 - 현재 비활성화 + KHAN_TOTAL("경향신문", "전체", "https://www.khan.co.kr/rss/rssdata/total_news.xml", "kh", false); + + private final String publisherName; + private final String categoryName; + private final String url; + private final String mapperType; + private final boolean enabled; + + RssSource(String publisherName, String categoryName, String url, String mapperType, boolean enabled) { + this.publisherName = publisherName; + this.categoryName = categoryName; + this.url = url; + this.mapperType = mapperType; + this.enabled = enabled; + } + + /** + * 언론사 이름과 카테고리를 결합한 표시 이름 반환 + */ + public String getDisplayName() { + return publisherName + "-" + categoryName; + } + + /** + * 매퍼 타입 키 반환 + */ + public String getMapperType() { + return mapperType; + } + + /** + * 언론사 코드 접두사 반환 (대문자) + */ + public String getCodePrefix() { + return mapperType.toUpperCase(); + } + + /** + * 활성화된 모든 소스 반환 + */ + public static List getEnabledSources() { + return Arrays.stream(values()) + .filter(RssSource::isEnabled) + .collect(Collectors.toList()); + } + + /** + * 특정 언론사의 모든 소스 반환 + */ + public static List getSourcesByPublisher(String publisherName) { + return Arrays.stream(values()) + .filter(source -> source.getPublisherName().equals(publisherName)) + .collect(Collectors.toList()); + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/repository/RssNewsRepository.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/repository/RssNewsRepository.java new file mode 100644 index 0000000..965740a --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/repository/RssNewsRepository.java @@ -0,0 +1,11 @@ +package com.likelion.backendplus4.talkpick.batch.rss.repository; + +import com.likelion.backendplus4.talkpick.batch.rss.entity.RssNews; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.stereotype.Repository; + +@Repository +public interface RssNewsRepository extends JpaRepository { + + boolean existsByLink(String link); +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/scheduler/RssScheduler.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/scheduler/RssScheduler.java new file mode 100644 index 0000000..26c142c --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/scheduler/RssScheduler.java @@ -0,0 +1,55 @@ +package com.likelion.backendplus4.talkpick.batch.rss.scheduler; + +import com.likelion.backendplus4.talkpick.batch.rss.exception.RssErrorCode; +import com.likelion.backendplus4.talkpick.batch.rss.exception.RssException; +import com.likelion.backendplus4.talkpick.batch.rss.service.RssService; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.scheduling.annotation.EnableScheduling; +import org.springframework.scheduling.annotation.Scheduled; +import org.springframework.stereotype.Component; + +/** + * RSS 피드 수집 작업을 스케줄링하는 클래스 + * + * @author 양병학 + * @since 2025-05-10 최초 작성 + * @modify 2025-05-10 cron 표현식을 설정 파일로 분리하여 유연성 개선 + */ +@Component +@EnableScheduling +@RequiredArgsConstructor +@Slf4j +public class RssScheduler { + + private final RssService rssService; + + /** + * RSS 피드를 정기적으로 수집하는 스케줄 작업 + * application.yml의 rss.scheduler.cron 속성으로 실행 주기 설정 + * 설정이 없을 경우 기본값으로 1분마다 실행 + * + * 1. 서비스를 거쳐서 피드 수집 + * 2. 처리된 항목 수 로깅 + * 3. 예외 발생 시 오류 로깅하고 다음 스케쥴까지 대기 + * + * @since 2025-05-10 최초 작성 + * @modify 2025-05-10 cron 표현식을 application으로 분리 + */ + @Scheduled(cron = "${rss.scheduler.cron:0 */1 * * * ?}") + public void scheduleRssFeedFetch() { + log.info("Quartz 정상시작"); + + try { + int processedItems = rssService.fetchAndProcessAllFeeds(); + log.info("Rss 피드 입력 Processed {} items", processedItems); + } catch (RssException e) { + // 커스텀 예외 처리 + log.error("[{}] 스케줄러 실행 오류: {}", e.getErrorCode().getCode(), e.getMessage(), e); + } catch (Exception e) { + // 기타 예외 처리 + log.error("[{}] 스케줄러 실행 중 예상치 못한 오류: {}", + RssErrorCode.SCHEDULER_EXECUTION_ERROR.getCode(), e.getMessage(), e); + } + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/RssMappingFactory.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/RssMappingFactory.java new file mode 100644 index 0000000..94e1db9 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/RssMappingFactory.java @@ -0,0 +1,43 @@ +package com.likelion.backendplus4.talkpick.batch.rss.service; + +import com.likelion.backendplus4.talkpick.batch.rss.service.mapper.RssMapper; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Component; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +@Component +public class RssMappingFactory { + + private final Map mappers = new HashMap<>(); + + /** + * 모든 RssMapper 구현체를 자동으로 주입받아 맵에 등록 + * + * @param availableMappers RssMapper 구현체 목록 + */ + @Autowired + public RssMappingFactory(List availableMappers) { + for (RssMapper mapper : availableMappers) { + String mapperType = mapper.getMapperType(); + mappers.put(mapperType, mapper); + } + } + + /** + * 타입에 맞게 mapper 반환 + * + * @param type 매퍼 타입 (소문자 언론사 코드) + * @return 해당 타입의 RSS 매퍼 + * @throws IllegalArgumentException 지원하지 않는 타입인 경우 + */ + public RssMapper getMapper(String type) { + RssMapper mapper = mappers.get(type); + if (mapper == null) { + throw new IllegalArgumentException("Mapper 없음: " + type); + } + return mapper; + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/RssService.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/RssService.java new file mode 100644 index 0000000..8e36940 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/RssService.java @@ -0,0 +1,154 @@ +package com.likelion.backendplus4.talkpick.batch.rss.service; + +import com.likelion.backendplus4.talkpick.batch.rss.entity.RssNews; +import com.likelion.backendplus4.talkpick.batch.rss.exception.RssErrorCode; +import com.likelion.backendplus4.talkpick.batch.rss.exception.RssException; +import com.likelion.backendplus4.talkpick.batch.rss.model.RssSource; +import com.likelion.backendplus4.talkpick.batch.rss.repository.RssNewsRepository; +import com.likelion.backendplus4.talkpick.batch.rss.service.mapper.RssMapper; +import com.rometools.rome.feed.synd.SyndEntry; +import com.rometools.rome.feed.synd.SyndFeed; +import com.rometools.rome.io.SyndFeedInput; +import com.rometools.rome.io.XmlReader; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.dao.DataIntegrityViolationException; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; + +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.function.Predicate; +import java.util.stream.Collectors; + +import static java.util.function.Predicate.not; + +/** + * RSS 피드를 수집하고 처리하는 서비스 클래스 + * + * @author 양병학 + * @since 2025-05-10 최초 작성 + */ +@Service +@RequiredArgsConstructor +@Slf4j +public class RssService { + + private final RssNewsRepository rssNewsRepository; + private final RssMappingFactory rssMappingFactory; + + /** + * 모든 활성화된 RSS 뉴스 피드를 가져와 처리 + * + * 1. 활성화된 모든 RSS 소스 조회 + * 2. 각 소스에서 RSS 피드 가져오기 + * 3. 수집된 모든 뉴스 항목 병합 + * 4. 데이터베이스에 저장 (중복 제외) + * + * @return 새로 저장된 뉴스 항목 수 + * @since 2025-05-10 최초 작성 + * @author 양병학 + */ + @Transactional + public int fetchAndProcessAllFeeds() { + List allNewsItems = new ArrayList<>(); + + RssSource.getEnabledSources().forEach(source -> { + try { + List items = fetchFeedFromSource(source); + allNewsItems.addAll(items); + } catch (Exception e) { + log.error("Error: 뉴스피드 fetch중 {}-{}: {}", + source.getPublisherName(), source.getCategoryName(), + e.getMessage(), e); + } + }); + + return saveItems(allNewsItems); + } + + /** + * 특정 소스에서 RSS 피드를 가져옴 + * + * @param source RSS 소스 정보 + * @return 변환된 RssNews 엔티티 목록 + * @throws Exception RSS 피드 로드 및 처리 중 발생할 수 있는 예외 + */ + private List fetchFeedFromSource(RssSource source) { + List newsItems = new ArrayList<>(); + try { + URL feedUrl = new URL(source.getUrl()); + SyndFeedInput input = new SyndFeedInput(); + SyndFeed feed; + + try { + feed = input.build(new XmlReader(feedUrl)); + } catch (Exception e) { + throw new RssException(RssErrorCode.FEED_PARSING_ERROR, + "피드 파싱 실패: " + source.getDisplayName(), e); + } + + RssMapper mapper; + try { + mapper = rssMappingFactory.getMapper(source.getMapperType()); + } catch (IllegalArgumentException e) { + throw new RssException(RssErrorCode.MAPPER_NOT_FOUND, + "매퍼 없음: " + source.getMapperType(), e); + } + + for (SyndEntry entry : feed.getEntries()) { + try { + RssNews newsItem = mapper.mapToRssNews(entry, source); + newsItems.add(newsItem); + } catch (Exception e) { + log.warn("[{}] 항목 변환 실패: {} - {}", + RssErrorCode.ITEM_MAPPING_ERROR.getCode(), + source.getDisplayName(), e.getMessage()); + // 한 항목 실패해도 계속 진행 + } + } + + log.info("페치완료 {} 갯수 뉴스피드 {}-{} 에서", + newsItems.size(), source.getPublisherName(), source.getCategoryName()); + } catch (RssException e) { + // 이미 래핑된 RssException은 그대로 로그 + log.error("[{}] 피드 로드 실패: {} - {}", + e.getErrorCode().getCode(), source.getDisplayName(), e.getMessage()); + } catch (Exception e) { + // 다른 예외는 RssException으로 래핑 + log.error("[{}] 피드 로드 실패: {} - {}", + RssErrorCode.FEED_CONNECTION_ERROR.getCode(), + source.getDisplayName(), e.getMessage(), e); + } + return newsItems; + } + + /** + * 수집된 뉴스 항목을 DB에 저장 + * + * @param newsItems 저장할 뉴스 항목 목록 + * @return 새로 저장된 항목 수 + */ + @Transactional + public int saveItems(List newsItems) { + int savedCount = 0; + + List newItems = newsItems.stream() + .filter(not(item -> rssNewsRepository.existsByLink(item.getLink()))) + .collect(Collectors.toList()); + + for (RssNews item : newItems) { + try { + rssNewsRepository.save(item); + savedCount++; + } catch (DataIntegrityViolationException e) { + log.debug("중복 항목 감지: {}", item.getLink()); + } + } + + log.info("{}개 뉴스 저장완료 (새로운 항목: {}, 총 가져온 항목: {})", + savedCount, newItems.size(), newsItems.size()); + return savedCount; + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/mapper/DongaRssMapper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/mapper/DongaRssMapper.java new file mode 100644 index 0000000..588390a --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/mapper/DongaRssMapper.java @@ -0,0 +1,54 @@ +//package com.likelion.backendplus4.talkpick.batch.rss.service.mapper; +// +//import com.rometools.rome.feed.synd.SyndCategory; +//import com.rometools.rome.feed.synd.SyndEntry; +//import com.likelion.backendplus4.talkpick.batch.rss.entity.RssNews; +//import org.springframework.stereotype.Component; +// +//import java.time.LocalDateTime; +//import java.time.ZoneId; +//import java.util.Date; +//import java.util.stream.Collectors; +// +//@Component +//public class DongaRssMapper implements RssMapper { +// +// /* +// SyndEntry는 Rome 라이브러리에서 RSS 항목 나타내는 객체 +// +// getTitle(): 제목 반환 +// getLink(): 링크 반환 +// getPublishedDate(): 발행일 반환 +// getDescription(): 설명(요약) 반환 +// getCategories(): 카테고리 목록 반환 +// getUri(): 항목의 고유 식별자(제공사 고유번호 2자리 + guid) 반환 +// (예시: KM0028082827 [KM: 국민일보]) +// 몇몇 피드들은 url에서 추출해서 사용 +// +// getForeignMarkup(): RSS 2.0 기본 태그 외의 확장 태그(Dublin Core 등) 접근 +// +// 필요하면 객체 수정해서 사용, CustomEntry +// */ +// +// @Override +// public RssNews mapToRssNews(SyndEntry entry) { +// // category +// String category = entry.getCategories().stream() +// .map(SyndCategory::getName) +// .collect(Collectors.joining(", ")); +// +// return RssNews.builder() +// .title(entry.getTitle()) +// .link(entry.getLink()) +// .pubDate(convertToLocalDateTime(entry.getPublishedDate())) +// .category(category) +// .guid(entry.getUri()) // URI를 GUID로 사용 +// .build(); +// } +// +// private LocalDateTime convertToLocalDateTime(Date date) { +// return date != null +// ? date.toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime() +// : LocalDateTime.now(); +// } +//} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/mapper/KhanRssMapper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/mapper/KhanRssMapper.java new file mode 100644 index 0000000..21891e6 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/mapper/KhanRssMapper.java @@ -0,0 +1,55 @@ +//package com.likelion.backendplus4.talkpick.batch.rss.service.mapper; +// +//import com.rometools.rome.feed.synd.SyndCategory; +//import com.rometools.rome.feed.synd.SyndEntry; +//import com.likelion.backendplus4.talkpick.batch.rss.entity.RssNews; +//import org.springframework.stereotype.Component; +// +//import java.time.LocalDateTime; +//import java.time.ZoneId; +//import java.util.Date; +//import java.util.stream.Collectors; +// +//@Component +//public class KhanRssMapper implements RssMapper { +// +// @Override +// public RssNews mapToRssNews(SyndEntry entry) { +// // 경향신문 dc:date를 사용 +// LocalDateTime pubDate; +// if (entry.getPublishedDate() != null) { +// pubDate = convertToLocalDateTime(entry.getPublishedDate()); +// } else { +// pubDate = entry.getForeignMarkup().stream() +// .filter(element -> "date".equals(element.getName()) && +// "dc".equals(element.getNamespacePrefix())) +// .findFirst() +// .map(element -> { +// try { +// return LocalDateTime.parse(element.getValue()); +// } catch (Exception e) { +// return LocalDateTime.now(); +// } +// }) +// .orElse(LocalDateTime.now()); +// } +// +// String category = entry.getCategories().stream() +// .map(SyndCategory::getName) +// .collect(Collectors.joining(", ")); +// +// return RssNews.builder() +// .title(entry.getTitle()) +// .link(entry.getLink()) +// .pubDate(pubDate) +// .category(category) +// .guid(entry.getUri()) +// .build(); +// } +// +// private LocalDateTime convertToLocalDateTime(Date date) { +// return date != null +// ? date.toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime() +// : LocalDateTime.now(); +// } +//} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/mapper/KmibRssMapper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/mapper/KmibRssMapper.java new file mode 100644 index 0000000..8848ea3 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/mapper/KmibRssMapper.java @@ -0,0 +1,62 @@ +package com.likelion.backendplus4.talkpick.batch.rss.service.mapper; + +import com.likelion.backendplus4.talkpick.batch.rss.model.RssSource; +import com.rometools.rome.feed.synd.SyndCategory; +import com.rometools.rome.feed.synd.SyndEntry; +import com.likelion.backendplus4.talkpick.batch.rss.entity.RssNews; +import org.springframework.stereotype.Component; + +import java.time.LocalDateTime; +import java.time.ZoneId; +import java.util.Date; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +@Component +public class KmibRssMapper implements RssMapper { + + private static final Pattern ARCID_PATTERN = Pattern.compile("arcid=([0-9]+)"); + + @Override + public RssNews mapToRssNews(SyndEntry entry, RssSource source) { + String arcId = extractArcIdFromLink(entry.getLink()); + String guid = source.getCodePrefix() + arcId; + + String description = ""; + if (entry.getDescription() != null) { + description = entry.getDescription().getValue(); + } + + return RssNews.builder() + .title(entry.getTitle()) + .link(entry.getLink()) + .pubDate(convertToLocalDateTime(entry.getPublishedDate())) + .category(source.getCategoryName()) // Enum에서 직접 카테고리 이름 가져옴 + .guid(guid) + .description(description) + .isSummary(false) + .build(); + } + + @Override + public String getMapperType() { + return "km"; + } + + private String extractArcIdFromLink(String link) { + if (link == null) return ""; + + Matcher matcher = ARCID_PATTERN.matcher(link); + if (matcher.find()) { + return matcher.group(1); + } + return link; + } + + private LocalDateTime convertToLocalDateTime(Date date) { + return null != date + ? date.toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime() + : LocalDateTime.now(); + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/mapper/RssMapper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/mapper/RssMapper.java new file mode 100644 index 0000000..5c5f871 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/mapper/RssMapper.java @@ -0,0 +1,29 @@ +package com.likelion.backendplus4.talkpick.batch.rss.service.mapper; + +import com.likelion.backendplus4.talkpick.batch.rss.entity.RssNews; +import com.likelion.backendplus4.talkpick.batch.rss.model.RssSource; +import com.rometools.rome.feed.synd.SyndEntry; + +/** + * RSS 항목을 RssNews 엔티티로 변환하는 매퍼 인터페이스 + * + * @author 양병학 + * @since 2025-05-10 최초 작성 + */ +public interface RssMapper { + /** + * RSS 피드 항목을 RssNews 엔티티로 변환합니다. + * + * @param entry 변환할 SyndEntry 객체(rss2.0 구조 지원) + * @param source RSS 소스 정보 + * @return 변환된 RssNews 엔티티 + */ + RssNews mapToRssNews(SyndEntry entry, RssSource source); + + /** + * 매퍼 타입을 반환 + * + * @return 매퍼 타입 (소문자 코드) + */ + String getMapperType(); +} \ No newline at end of file From f197827ee500787ae4b7570e60e8ee4af1081542 Mon Sep 17 00:00:00 2001 From: "Yejeong, Ham" Date: Sat, 10 May 2025 22:09:08 +0900 Subject: [PATCH 06/36] =?UTF-8?q?=F0=9F=93=A6=20Chore:=20=ED=8C=A8?= =?UTF-8?q?=ED=82=A4=EC=A7=80=20=EC=98=A4=ED=83=80=20=EC=88=98=EC=A0=95=20?= =?UTF-8?q?(#41)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * com.likelion.backendplus4.talkpick.batch.common.configuraition.p6spy 패키지명 오타 수정 * configuraition -> configuration --- .../{configuraition => configuration}/p6spy/P6spyConfig.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename src/main/java/com/likelion/backendplus4/talkpick/batch/common/{configuraition => configuration}/p6spy/P6spyConfig.java (99%) diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuraition/p6spy/P6spyConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/p6spy/P6spyConfig.java similarity index 99% rename from src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuraition/p6spy/P6spyConfig.java rename to src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/p6spy/P6spyConfig.java index 04d7190..0e82519 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuraition/p6spy/P6spyConfig.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/p6spy/P6spyConfig.java @@ -1,4 +1,4 @@ -package com.likelion.backendplus4.talkpick.batch.common.configuraition.p6spy; +package com.likelion.backendplus4.talkpick.batch.common.configuration.p6spy; import com.p6spy.engine.logging.Category; import com.p6spy.engine.spy.P6SpyOptions; From f757eff5969a0df50eb67bfe85342daa99c19c66 Mon Sep 17 00:00:00 2001 From: "Yejeong, Ham" Date: Sun, 11 May 2025 18:36:44 +0900 Subject: [PATCH 07/36] =?UTF-8?q?=F0=9F=93=A6=20Chore:=20=EB=B0=B0?= =?UTF-8?q?=ED=8F=AC=20=EC=95=A1=EC=85=98=20=EC=B6=94=EA=B0=80=20(#48)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 📦 Chore: 배포 액션 추가 * 📦 Chore: docker-compose 경로 수정 * 📦 Chore: 테스트용 수정 파일 첨부 --- .github/workflows/batch-dev-deploy.yml | 35 ++++++++++++++++++++++++++ src/test/test.txt | 0 2 files changed, 35 insertions(+) create mode 100644 .github/workflows/batch-dev-deploy.yml create mode 100644 src/test/test.txt diff --git a/.github/workflows/batch-dev-deploy.yml b/.github/workflows/batch-dev-deploy.yml new file mode 100644 index 0000000..30bbe3e --- /dev/null +++ b/.github/workflows/batch-dev-deploy.yml @@ -0,0 +1,35 @@ +name: talkpick-batch-dev-deploy + +on: + push: + branches: + - dev + paths-ignore: + - '.github/**' + +jobs: + deploy: + runs-on: self-hosted + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Set up Java + uses: actions/setup-java@v3 + with: + distribution: 'temurin' + java-version: '21' + + - name: Make gradlew executable + run: chmod +x ./gradlew + + - name: Build Spring Boot (JAR) + run: ./gradlew bootJar -Penv=test --build-dir /deploy/batch/build + + - name: Restart Docker + run: | + cd /deploy + docker-compose stop batch + docker-compose rm -f batch + docker-compose up -d --build batch \ No newline at end of file diff --git a/src/test/test.txt b/src/test/test.txt new file mode 100644 index 0000000..e69de29 From 7f9145adab3e56428ca63d16d9d381b0016b5a5d Mon Sep 17 00:00:00 2001 From: "Yejeong, Ham" Date: Sun, 11 May 2025 18:54:50 +0900 Subject: [PATCH 08/36] =?UTF-8?q?=F0=9F=93=A6=20Chore:=20#47=20GitHub=20ac?= =?UTF-8?q?tion=20bulid=20path=20=EB=AA=85=EB=A0=B9=EC=96=B4=20=EC=98=A4?= =?UTF-8?q?=EB=A5=98=20=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 📦 Chore: 배포 액션 추가 * 📦 Chore: docker-compose 경로 수정 * 📦 Chore: 테스트용 수정 파일 첨부 * 📦 Chore: Git-Action bulid path 명령어 오류 수정 --- .github/workflows/batch-dev-deploy.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/batch-dev-deploy.yml b/.github/workflows/batch-dev-deploy.yml index 30bbe3e..9aaa544 100644 --- a/.github/workflows/batch-dev-deploy.yml +++ b/.github/workflows/batch-dev-deploy.yml @@ -20,12 +20,15 @@ jobs: with: distribution: 'temurin' java-version: '21' + - name: Set custom build directory dynamically + run: | + echo "buildDir=/deploy/${{ github.event.repository.name }}/build" >> gradle.properties - name: Make gradlew executable run: chmod +x ./gradlew - name: Build Spring Boot (JAR) - run: ./gradlew bootJar -Penv=test --build-dir /deploy/batch/build + run: ./gradlew bootJar - name: Restart Docker run: | From 62b69bffee66bc7ff62bb5c8bf791100858e6927 Mon Sep 17 00:00:00 2001 From: "Yejeong, Ham" Date: Sun, 11 May 2025 18:57:49 +0900 Subject: [PATCH 09/36] =?UTF-8?q?=F0=9F=93=A6=20Chore:=20=ED=85=8C?= =?UTF-8?q?=EC=8A=A4=ED=8A=B8=20=ED=8C=8C=EC=9D=BC=20=EC=82=AD=EC=A0=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 📦 Chore: 배포 액션 추가 * 📦 Chore: docker-compose 경로 수정 * 📦 Chore: 테스트용 수정 파일 첨부 * 📦 Chore: Git-Action bulid path 명령어 오류 수정 * 📦 Chore: 테스트 파일 삭제 --- src/test/test.txt | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 src/test/test.txt diff --git a/src/test/test.txt b/src/test/test.txt deleted file mode 100644 index e69de29..0000000 From d8d60a2ce840d170fa7db7e636dac489ee077920 Mon Sep 17 00:00:00 2001 From: "Yejeong, Ham" Date: Sun, 11 May 2025 19:08:18 +0900 Subject: [PATCH 10/36] =?UTF-8?q?=F0=9F=93=A6=20Chore:=20=EB=B0=B0?= =?UTF-8?q?=ED=8F=AC=20=EC=98=A4=EB=A5=98=20=EC=88=98=EC=A0=95=20-=20jar?= =?UTF-8?q?=ED=8C=8C=EC=9D=BC=20=EB=B3=B5=EC=82=AC=20=EC=B6=94=EA=B0=80=20?= =?UTF-8?q?(#51)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 📦 Chore: github action - jar 파일 복사 추가 --- .github/workflows/batch-dev-deploy.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/batch-dev-deploy.yml b/.github/workflows/batch-dev-deploy.yml index 9aaa544..a315a21 100644 --- a/.github/workflows/batch-dev-deploy.yml +++ b/.github/workflows/batch-dev-deploy.yml @@ -22,7 +22,7 @@ jobs: java-version: '21' - name: Set custom build directory dynamically run: | - echo "buildDir=/deploy/${{ github.event.repository.name }}/build" >> gradle.properties + echo "buildDir=/deploy/${{ github.event.repository.name }}/build/gradle" >> gradle.properties - name: Make gradlew executable run: chmod +x ./gradlew @@ -30,6 +30,9 @@ jobs: - name: Build Spring Boot (JAR) run: ./gradlew bootJar + - name: Copy JAR to shared volume + run: cp /deploy/${{ github.event.repository.name }}/build/gradle/libs/*.jar /deploy/{{ github.event.repository.name }}/build/app.jar + - name: Restart Docker run: | cd /deploy From 69b1d6d55bb5f167715b213a8aa01665aa4d113a Mon Sep 17 00:00:00 2001 From: "Yejeong, Ham" Date: Sun, 11 May 2025 19:13:18 +0900 Subject: [PATCH 11/36] =?UTF-8?q?=F0=9F=93=A6=20Chore:=20Action=20?= =?UTF-8?q?=EC=88=98=EB=8F=99=20=EC=8B=A4=ED=96=89=20=EA=B0=80=EB=8A=A5?= =?UTF-8?q?=ED=95=98=EB=8F=84=EB=A1=9D=20=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/batch-dev-deploy.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/batch-dev-deploy.yml b/.github/workflows/batch-dev-deploy.yml index a315a21..e38f4c1 100644 --- a/.github/workflows/batch-dev-deploy.yml +++ b/.github/workflows/batch-dev-deploy.yml @@ -6,6 +6,7 @@ on: - dev paths-ignore: - '.github/**' + workflow_dispatch: jobs: deploy: @@ -38,4 +39,4 @@ jobs: cd /deploy docker-compose stop batch docker-compose rm -f batch - docker-compose up -d --build batch \ No newline at end of file + docker-compose up -d --build batch From adc92556dcaec661bd89d237beb80ee2b26ec58c Mon Sep 17 00:00:00 2001 From: "Yejeong, Ham" Date: Sun, 11 May 2025 19:36:11 +0900 Subject: [PATCH 12/36] =?UTF-8?q?=F0=9F=93=A6=20Chore:=20github=20action?= =?UTF-8?q?=20=EA=B2=BD=EB=A1=9C=20=EC=84=A4=EC=A0=95=20=EB=AC=B8=EC=A0=9C?= =?UTF-8?q?=20=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/batch-dev-deploy.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/batch-dev-deploy.yml b/.github/workflows/batch-dev-deploy.yml index e38f4c1..8bb496d 100644 --- a/.github/workflows/batch-dev-deploy.yml +++ b/.github/workflows/batch-dev-deploy.yml @@ -30,10 +30,11 @@ jobs: - name: Build Spring Boot (JAR) run: ./gradlew bootJar - - name: Copy JAR to shared volume - run: cp /deploy/${{ github.event.repository.name }}/build/gradle/libs/*.jar /deploy/{{ github.event.repository.name }}/build/app.jar - + run: | + cp /deploy/${{ github.event.repository.name }}/build/gradle/libs/*.jar \ + /deploy/${{ github.event.repository.name }}/build/app.jar + - name: Restart Docker run: | cd /deploy From 7552af850d5a21075259ef96f6af157d7b726b74 Mon Sep 17 00:00:00 2001 From: JUNG ANSIK Date: Sun, 11 May 2025 19:51:29 +0900 Subject: [PATCH 13/36] =?UTF-8?q?=E2=9C=A8=20Feature:=20#34=20open=20ai=20?= =?UTF-8?q?=ED=99=98=EA=B2=BD=EC=84=A4=EC=A0=95=20=EC=B6=94=EA=B0=80=20(#4?= =?UTF-8?q?4)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ✨ Feature/#34-OpenAI-환경설정-추가 * ✨ feat: EmbeddingPort와 구현체인 OpenAIEmbeddingAdapter를 추가하였습니다. * ♻️ Refactor: OpenAIEmbeddingAdapter에서 private메서드로 리팩토링을 진행하였습니다., EmbeddingErrorCode 및 EmbeddingException을 추가하였습니다. * 📝 Docs: Embedding기능 전반에 걸쳐 주석을 추가하였습니다. * ♻️ Refactor: OpenAIEmbeddingAdapter에서 모든 필드의 의존성 주입을 생성자 주입으로 변경하였습니다. --- .../configuration/openai/OpenaiConfig.java | 29 ++++++ .../batch/embedding/EmbeddingPort.java | 13 +++ .../embedding/OpenAIEmbeddingAdapter.java | 93 +++++++++++++++++++ .../exception/EmbeddingException.java | 23 +++++ .../exception/error/EmbeddingErrorCode.java | 57 ++++++++++++ src/main/resources/application.yml | 8 +- 6 files changed, 219 insertions(+), 4 deletions(-) create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/openai/OpenaiConfig.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/embedding/EmbeddingPort.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/embedding/OpenAIEmbeddingAdapter.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/embedding/exception/EmbeddingException.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/embedding/exception/error/EmbeddingErrorCode.java diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/openai/OpenaiConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/openai/OpenaiConfig.java new file mode 100644 index 0000000..d65b0f1 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/openai/OpenaiConfig.java @@ -0,0 +1,29 @@ +package com.likelion.backendplus4.talkpick.batch.common.configuration.openai; + +import org.springframework.ai.openai.api.OpenAiApi; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +/** + * OpenAI API 클라이언트 빈을 생성하는 구성 클래스 + * + * @since 2025-05-11 + */ +@Configuration +public class OpenaiConfig { + @Value("${spring.ai.openai.api-key}") + private String apiKey; + + /** + * OpenAiApi 빈을 생성한다. + * + * @return OpenAI API 클라이언트 인스턴스 + * @author 정안식 + * @since 2025-05-11 + */ + @Bean + public OpenAiApi openaiApi() { + return new OpenAiApi(apiKey); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/embedding/EmbeddingPort.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/embedding/EmbeddingPort.java new file mode 100644 index 0000000..a91aabc --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/embedding/EmbeddingPort.java @@ -0,0 +1,13 @@ +package com.likelion.backendplus4.talkpick.batch.embedding; + +public interface EmbeddingPort { + + /** + * 주어진 텍스트에 대한 임베딩 벡터를 반환한다. + * + * @param text 입력 텍스트 + * @return 텍스트 임베딩 벡터 배열 + * @since 2025-05-11 + */ + float[] getEmbedding(String text); +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/embedding/OpenAIEmbeddingAdapter.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/embedding/OpenAIEmbeddingAdapter.java new file mode 100644 index 0000000..7ce7aff --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/embedding/OpenAIEmbeddingAdapter.java @@ -0,0 +1,93 @@ +package com.likelion.backendplus4.talkpick.batch.embedding; + +import java.util.List; + +import org.springframework.ai.document.MetadataMode; +import org.springframework.ai.embedding.EmbeddingResponse; +import org.springframework.ai.openai.OpenAiEmbeddingModel; +import org.springframework.ai.openai.OpenAiEmbeddingOptions; +import org.springframework.ai.openai.api.OpenAiApi; +import org.springframework.ai.retry.RetryUtils; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.EntryExitLog; +import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.LogMethodValues; +import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.TimeTracker; +import com.likelion.backendplus4.talkpick.batch.embedding.exception.EmbeddingException; +import com.likelion.backendplus4.talkpick.batch.embedding.exception.error.EmbeddingErrorCode; + +/** + * OpenAI API를 사용하여 텍스트 임베딩을 생성하는 어댑터 구현체 + * + * @since 2025-05-11 + */ +@Component +public class OpenAIEmbeddingAdapter implements EmbeddingPort { + private final OpenAiApi openAiApi; + private final String embeddingModelName; + + public OpenAIEmbeddingAdapter(OpenAiApi openAiApi, + @Value("${spring.ai.openai.embedding-model}") String embeddingModelName) { + this.openAiApi = openAiApi; + this.embeddingModelName = embeddingModelName; + } + + /** + * 주어진 텍스트에 대한 임베딩 벡터를 반환한다. + * + * @param text 입력 텍스트 + * @return 텍스트 임베딩 벡터 배열 + * @since 2025-05-11 + */ + @EntryExitLog + @LogMethodValues + @TimeTracker + @Override + public float[] getEmbedding(String text) { + OpenAiEmbeddingModel model = createModel(); + return executeEmbedding(model, text); + } + + /** + * OpenAI 임베딩 모델 인스턴스를 생성한다. + * + * @return 생성된 OpenAiEmbeddingModel 객체 + * @throws EmbeddingException 모델 생성 중 오류 발생 시 + * @author 정안식 + * @since 2025-05-11 + */ + private OpenAiEmbeddingModel createModel() { + try { + return new OpenAiEmbeddingModel( + openAiApi, + MetadataMode.EMBED, + OpenAiEmbeddingOptions.builder() + .model(embeddingModelName) + .build(), + RetryUtils.DEFAULT_RETRY_TEMPLATE + ); + } catch (Exception e) { + throw new EmbeddingException(EmbeddingErrorCode.MODEL_CREATION_ERROR, e); + } + } + + /** + * 주어진 모델을 사용하여 텍스트 임베딩을 계산한다. + * + * @param model OpenAiEmbeddingModel 인스턴스 + * @param text 입력 텍스트 + * @return 계산된 임베딩 벡터 배열 + * @throws EmbeddingException API 호출 중 오류 발생 시 + * @author 정안식 + * @since 2025-05-11 + */ + private float[] executeEmbedding(OpenAiEmbeddingModel model, String text) { + try { + EmbeddingResponse response = model.embedForResponse(List.of(text)); + return response.getResults().getFirst().getOutput(); + } catch (Exception e) { + throw new EmbeddingException(EmbeddingErrorCode.API_CALL_ERROR, e); + } + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/embedding/exception/EmbeddingException.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/embedding/exception/EmbeddingException.java new file mode 100644 index 0000000..0160dcc --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/embedding/exception/EmbeddingException.java @@ -0,0 +1,23 @@ +package com.likelion.backendplus4.talkpick.batch.embedding.exception; + +import com.likelion.backendplus4.talkpick.batch.common.exception.CustomException; +import com.likelion.backendplus4.talkpick.batch.common.exception.error.ErrorCode; + +public class EmbeddingException extends CustomException { + private final ErrorCode errorCode; + + public EmbeddingException(ErrorCode errorCode) { + super(errorCode); + this.errorCode = errorCode; + } + + public EmbeddingException(ErrorCode errorCode, Throwable cause) { + super(errorCode, cause); + this.errorCode = errorCode; + } + + @Override + public ErrorCode getErrorCode() { + return errorCode; + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/embedding/exception/error/EmbeddingErrorCode.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/embedding/exception/error/EmbeddingErrorCode.java new file mode 100644 index 0000000..55774fa --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/embedding/exception/error/EmbeddingErrorCode.java @@ -0,0 +1,57 @@ +package com.likelion.backendplus4.talkpick.batch.embedding.exception.error; + +import org.springframework.http.HttpStatus; + +import com.likelion.backendplus4.talkpick.batch.common.exception.error.ErrorCode; + +import lombok.RequiredArgsConstructor; + +/** + * 에러 코드 인터페이스 각 에러 항목에 대한 HTTP 상태, 에러 번호, 메시지를 제공한다. + * A[BB][CCC] + * A (1자리) : 에러 심각도 (1~5) + * 1: 클라이언트 오류 + * 2: 인증 관련 오류 + * 3: 사용자 관련 오류 + * 4: 서버 오류 + * 5: 시스템 오류 + * + * BB (2자리) : 도메인 코드 + * 10: 사용자 관련 (ex: USER_NOT_FOUND) + * 20: 인증 관련 (ex: AUTHORIZATION_FAILED) + * 30: DB 관련 오류 (ex: DB_CONNECTION_FAILED) + * 40: API 관련 오류 (ex: API_TIMEOUT) + * 50: 시스템 오류 (ex: INTERNAL_SERVER_ERROR) + * + * CCC (3자리) : 세부 오류 순번 + * 001: 첫 번째 오류 + * 002: 두 번째 오류 + * 003: 세 번째 오류, 등등 + * + * @modified 2025-05-09 + * @since 2025-05-09 + */ +@RequiredArgsConstructor +public enum EmbeddingErrorCode implements ErrorCode { + MODEL_CREATION_ERROR(HttpStatus.INTERNAL_SERVER_ERROR, 440001, "임베딩 모델 생성 실패"), + API_CALL_ERROR(HttpStatus.SERVICE_UNAVAILABLE, 440002, "임베딩 API 호출 실패"); + + private final HttpStatus status; + private final int code; + private final String message; + + @Override + public HttpStatus httpStatus() { + return status; + } + + @Override + public int codeNumber() { + return code; + } + + @Override + public String message() { + return message; + } +} diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index b05181c..4c91452 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -5,6 +5,10 @@ server: enabled: false spring: + ai: + openai: + api-key: ${OPENAI_API_KEY} + embedding-model: text-embedding-3-small application: name: talkpick-batch datasource: @@ -30,10 +34,6 @@ log: web: resources: add-mappings: false -logging: - pattern: - file: "%d{yyyy-MM-dd HH:mm:ss.SSS} [%level] [%thread] [%logger{36}] - %msg%n" - console: "%cyan(%d{yyyy-MM-dd HH:mm:ss.SSS}) %highlight(%-5level) %yellow([%thread]) %green(%logger{36}) - %msg%n" decorator: datasource: p6spy: From 09503de9a7b5061a000be0a5523cab967f2d389c Mon Sep 17 00:00:00 2001 From: "Yejeong, Ham" Date: Mon, 12 May 2025 22:44:02 +0900 Subject: [PATCH 14/36] =?UTF-8?q?=E2=9C=A8=20Feature:=20#37=20=EC=8A=A4?= =?UTF-8?q?=ED=94=84=EB=A7=81=20=EB=B0=B0=EC=B9=98=20=EC=B6=94=EA=B0=80=20?= =?UTF-8?q?(#46)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 📦 Chore: RSS 관련 클래스 패키지 이동 (헥사고날 적용) * 📦 Chore: application.yml 파일 수정 ### 삭제 * 스프링 기본 log 설정 yml 내용 (logging) ### 이동 * spring-web에 있어야 할 add-mappings가 병합 과정에서 다른 곳으로 이동 되어 위치 이동 * 📦 Chore: 패키지 이동 (헥사고날) * news 라는 메인 도메인 아래 - rss 라는 서브도메인이 있다고 생각하여 패키지 수정 * ✨ Feature: 뉴스 수집을 Spring Batch 및 Quartz로 전환 * ♻️ Refactor: Quartz 및 Batch 설정 리팩토링 * ♻️ Refactor: Quartz 트리거 설정 누락 추가 * 🐛 Fix: 리팩토링 과정에서 트리거 작동 안하는 문제 수정 * 📦️ Chore: 주석 처리 및 예외 처리 추가 * ♻️ Refactor: Collector Adapter 예외 처리 추가 * 📦️ Chore: UseCase 주석 추가 * 📦️ Chore: Port 주석 추가 * 📦️ 클래스 이름 변경 및 주석 추가 * 📦️ RespBuilder 주석 추가 * 📦️ Article Collector Service 주석 추가 * 📦️ 예외 처리 클래스 주석 수정 * 📦️ 주석 추가 * 📦️ 개발 환경에서 바꾼 옵션 제거 * 📦️ 코드 컨벤션 문자 포맷팅 (IDE 기능) * ♻️ Refactor: 기존 Mapper 클래스 추상클래스로 리팩토링 * ♻️ Refactor: Abstract내의 description 공백처리를 엔티티 단계에 위임 * 🐛 Fix: application 오타수정 * ♻️ Refactor: 리뷰 내용 일부 반영 * ♻️ Refactor: buildArticleEntityList 메소드 for문 람다로 수정 * ♻️ Refactor: 새로 저장된 뉴스가 없어도 로그 기록하도록 수정 * ♻️ Refactor: 불필요한 static 삭제 * ♻️ Refactor: 계산 연산 메소드 분리 * 🐛 Fix: 카테고리 호출 에러 수정 및 NewsCategory Enum에 카테고리 정리 * ✨ Feature: Step 예외 발생시 retry 추가 * 🐛 Fix: processor 오타수정 * 🐛 Fix: URL connection timeout 설정 추가 * 📦 Chore: 배치 및 쿼츠 DB 설정 변경, p6spy 기본 설정 변경 * 배치, 쿼츠 DB 자동 생성, 초기화 안되도록 수정 * 배치, 쿼츠 기본 DB 생성 DDL SQL 파일 추가 * p6spy 로그 기록 끔 (배치 로그 가독성 개선) --------- Co-authored-by: Atriel1999 --- application.yml | 41 ---- build.gradle | 1 - .../port/in/ArticleCollectorUseCase.java | 26 +++ .../application/port/out/CollectorPort.java | 33 ++++ .../service/ArticleCollectorService.java | 65 +++++++ .../dto/ArticleCollectorStatusResponse.java | 26 +++ .../exception/ArticleCollectorException.java | 35 ++++ .../BatchJobExceptionTranslator.java | 37 ++++ .../error/ArticleCollectorErrorCode.java | 64 +++++++ .../collector/adapter/CollectorAdapter.java | 103 ++++++++++ .../collector/config/batch/JobConfig.java | 45 +++++ .../collector/config/batch/NewsCategory.java | 27 +++ .../collector/config/batch/RssSource.java | 117 ++++++++++++ .../collector/config/batch/StepConfig.java | 104 ++++++++++ .../config/executor/BatchJobExecutor.java | 73 +++++++ .../config/executor/TaskExecutorConfig.java | 49 +++++ .../config/quartz/QuartzJobConfig.java | 47 +++++ .../config/quartz/QuartzTriggerConfig.java | 49 +++++ .../processor/RssEntryProcessor.java | 101 ++++++++++ .../collector/processor/RssFeedReader.java | 96 ++++++++++ .../collector/reader/ArticleReader.java | 51 +++++ .../reader/RssSourcePartitioner.java | 131 +++++++++++++ .../support/mapper/AbstractRssMapper.java | 140 ++++++++++++++ .../mapper/factory/RssMappingFactory.java | 50 +++++ .../mapper/implement/DongaRssMapper.java | 81 ++++++++ .../mapper/implement/KhanRssMapper.java | 128 +++++++++++++ .../mapper/implement/KmibRssMapper.java | 64 +++++++ .../collector/writer/ArticleWriter.java | 68 +++++++ .../jpa/entity/ArticleEntity.java} | 11 +- .../jpa/repository/RssNewsRepository.java | 12 ++ .../ArticleCollectorController.java | 51 +++++ .../batch/rss/exception/RssErrorCode.java | 37 ---- .../batch/rss/exception/RssException.java | 62 ------ .../talkpick/batch/rss/model/RssSource.java | 84 -------- .../rss/repository/RssNewsRepository.java | 11 -- .../batch/rss/scheduler/RssScheduler.java | 55 ------ .../batch/rss/service/RssMappingFactory.java | 43 ----- .../batch/rss/service/RssService.java | 154 --------------- .../rss/service/mapper/DongaRssMapper.java | 54 ------ .../rss/service/mapper/KhanRssMapper.java | 55 ------ .../rss/service/mapper/KmibRssMapper.java | 62 ------ .../batch/rss/service/mapper/RssMapper.java | 29 --- src/main/resources/application.yml | 24 ++- src/main/resources/batch/schema-mysql.sql | 98 ++++++++++ .../resources/quartz/tables_mysql_innodb.sql | 179 ++++++++++++++++++ 45 files changed, 2177 insertions(+), 696 deletions(-) delete mode 100644 application.yml create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/application/port/in/ArticleCollectorUseCase.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/application/port/out/CollectorPort.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/application/service/ArticleCollectorService.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/application/service/dto/ArticleCollectorStatusResponse.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/exception/ArticleCollectorException.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/exception/BatchJobExceptionTranslator.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/exception/error/ArticleCollectorErrorCode.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/adapter/CollectorAdapter.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/batch/JobConfig.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/batch/NewsCategory.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/batch/RssSource.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/batch/StepConfig.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/executor/BatchJobExecutor.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/executor/TaskExecutorConfig.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/quartz/QuartzJobConfig.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/quartz/QuartzTriggerConfig.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/processor/RssEntryProcessor.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/processor/RssFeedReader.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/reader/ArticleReader.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/reader/RssSourcePartitioner.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/AbstractRssMapper.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/factory/RssMappingFactory.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/DongaRssMapper.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/KhanRssMapper.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/KmibRssMapper.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/writer/ArticleWriter.java rename src/main/java/com/likelion/backendplus4/talkpick/batch/{rss/entity/RssNews.java => news/article/infrastructure/jpa/entity/ArticleEntity.java} (80%) create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/RssNewsRepository.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/presentation/controller/ArticleCollectorController.java delete mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/rss/exception/RssErrorCode.java delete mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/rss/exception/RssException.java delete mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/rss/model/RssSource.java delete mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/rss/repository/RssNewsRepository.java delete mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/rss/scheduler/RssScheduler.java delete mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/RssMappingFactory.java delete mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/RssService.java delete mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/mapper/DongaRssMapper.java delete mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/mapper/KhanRssMapper.java delete mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/mapper/KmibRssMapper.java delete mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/mapper/RssMapper.java create mode 100644 src/main/resources/batch/schema-mysql.sql create mode 100644 src/main/resources/quartz/tables_mysql_innodb.sql diff --git a/application.yml b/application.yml deleted file mode 100644 index 0fc5aec..0000000 --- a/application.yml +++ /dev/null @@ -1,41 +0,0 @@ -spring: - datasource: - url: ${MYSQL_URL} - username: ${MYSQL_USERNAME} - password: ${MYSQL_PASSWORD} - driver-class-name: com.mysql.cj.jdbc.Driver - hikari: - maximum-pool-size: 20 - minimum-idle: 5 - idle-timeout: 30000 - connection-timeout: 20000 - - jpa: - hibernate: - ddl-auto: update - properties: - hibernate: - dialect: org.hibernate.dialect.MySQL8Dialect - format_sql: true - show-sql: true - ai: - openai: - api-key: ${OPENAI_API_KEY} - -server: - port: ${WEB_PORT:8080} - -logging: - level: - org: - hibernate: - SQL: DEBUG - type: - descriptor: - sql: - BasicBinder: TRACE - -rss: - scheduler: - cron: "0 */1 * * * ?" - diff --git a/build.gradle b/build.gradle index a921de4..77c6217 100644 --- a/build.gradle +++ b/build.gradle @@ -27,7 +27,6 @@ dependencies { // Spring-boot implementation 'org.springframework.boot:spring-boot-starter-web' - implementation 'com.fasterxml.jackson.dataformat:jackson-dataformat-xml' implementation 'me.paulschwarz:spring-dotenv:3.0.0' diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/application/port/in/ArticleCollectorUseCase.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/application/port/in/ArticleCollectorUseCase.java new file mode 100644 index 0000000..9e0643b --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/application/port/in/ArticleCollectorUseCase.java @@ -0,0 +1,26 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.application.port.in; + +import com.likelion.backendplus4.talkpick.batch.news.article.application.service.dto.ArticleCollectorStatusResponse; + +/** + * 뉴스 RSS 수집 실행/정지를 위한 유스케이스 인터페이스. + * RSS 수집기를 제어하고 현재 상태 정보를 반환한다. + * + * @since 2025-05-10 + */ +public interface ArticleCollectorUseCase { + + /** + * 수집 스케줄을 시작한다. + * + * @return 수집기의 상태 정보를 담은 응답 객체 + */ + ArticleCollectorStatusResponse start(); + + /** + * 뉴스 스케줄을 중단한다. + * + * @return 수집기의 상태 정보를 담은 응답 객체 + */ + ArticleCollectorStatusResponse stop(); +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/application/port/out/CollectorPort.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/application/port/out/CollectorPort.java new file mode 100644 index 0000000..d0363e2 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/application/port/out/CollectorPort.java @@ -0,0 +1,33 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.application.port.out; + +/** + * 뉴스 수집 스케줄 제어를 위한 외부 시스템 연동 포트 인터페이스.

+ * Quartz Scheduler 등의 외부 실행 환경을 시작/정지하거나 상태를 확인할 수 있도록 정의한다.

+ * + * 이 포트는 Adapter를 통해 실제 구현되며,

+ * 유스케이스 계층에서는 이 인터페이스만 의존한다. + * + * @since 2025-05-10 + */ +public interface CollectorPort { + /** + * 스케줄 실행을 요청한다. + * + * @return 실행 요청이 성공하고 실제로 실행 중이면 true + */ + boolean start(); + + /** + * 스케줄 정지를 요청한다. + * + * @return 정지 요청이 성공하면 true + */ + boolean stop(); + + /** + * 현재 실행 중인지 확인한다. + * + * @return 실행 중이면 true, 아니면 false + */ + boolean isRunning(); +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/application/service/ArticleCollectorService.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/application/service/ArticleCollectorService.java new file mode 100644 index 0000000..b700025 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/application/service/ArticleCollectorService.java @@ -0,0 +1,65 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.application.service; + +import org.springframework.stereotype.Service; + +import com.likelion.backendplus4.talkpick.batch.news.article.application.port.in.ArticleCollectorUseCase; +import com.likelion.backendplus4.talkpick.batch.news.article.application.port.out.CollectorPort; +import com.likelion.backendplus4.talkpick.batch.news.article.application.service.dto.ArticleCollectorStatusResponse; + +import lombok.RequiredArgsConstructor; + +/** + * 뉴스 기사 수집 스케줄러를 제어하는 유스케이스 구현체. + * 수집기 실행 및 중단 요청을 처리하고, 그 결과를 상태 응답으로 반환한다. + * + * 내부적으로 {@link CollectorPort}를 호출하여 Quartz Scheduler 상태를 제어하며, + * 실행 결과에 따라 성공/실패 메시지를 포함한 {@link ArticleCollectorStatusResponse}를 생성한다. + * + * @author 함예정 + * @since 2025-05-11 + */ +@Service +@RequiredArgsConstructor +public class ArticleCollectorService implements ArticleCollectorUseCase { + private final CollectorPort collectorPort; + + /** + * 수집기를 실행한다. + * 실행 성공 여부에 따라 상태 응답을 생성한다. + * + * @return 실행 결과에 대한 상태 응답 + * @author 함예정 + * @since 2025-05-11 + */ + @Override + public ArticleCollectorStatusResponse start() { + boolean result = collectorPort.start(); + return getCollectorStatusResponse(result); + } + + /** + * 수집기를 정지한다. + * 정지 성공 여부에 따라 상태 응답을 생성한다. + * + * @return 정지 결과에 대한 상태 응답 + * @author 함예정 + * @since 2025-05-11 + */ + @Override + public ArticleCollectorStatusResponse stop() { + boolean result = collectorPort.stop(); + return getCollectorStatusResponse(result); + } + + /** + * 실행 결과에 따라 응답 메시지를 구성한다. + * + * @param result CollectorPort 실행 결과 + * @return 상태 응답 객체 + * @author 함예정 + * @since 2025-05-11 + */ + private ArticleCollectorStatusResponse getCollectorStatusResponse(boolean result) { + return new ArticleCollectorStatusResponse(result); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/application/service/dto/ArticleCollectorStatusResponse.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/application/service/dto/ArticleCollectorStatusResponse.java new file mode 100644 index 0000000..7ff4aec --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/application/service/dto/ArticleCollectorStatusResponse.java @@ -0,0 +1,26 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.application.service.dto; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.RequiredArgsConstructor; + +/** + * 실행 상태를 클라이언트에 전달하기 위한 응답 DTO. + * + * @author 함예정 + * @since 2025-05-11 + */ +@Getter +public class ArticleCollectorStatusResponse { + private final boolean running; + private final String message; + + private final String failMessage = "처리에 실패했습니다"; + private final String successMessage = "요청이 성공적으로 전달 됐습니다"; + + + public ArticleCollectorStatusResponse(boolean running) { + this.running = running; + this.message = running ? successMessage : failMessage; + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/exception/ArticleCollectorException.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/exception/ArticleCollectorException.java new file mode 100644 index 0000000..95a228f --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/exception/ArticleCollectorException.java @@ -0,0 +1,35 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.exception; + +import com.likelion.backendplus4.talkpick.batch.common.exception.CustomException; +import com.likelion.backendplus4.talkpick.batch.common.exception.error.ErrorCode; + +/** + * 뉴스 기사 수집 관련 예외처리 클래스 + * + * @since 2025-05-10 + * @modified 2025-05-11 + * - 클래스에서 저자 삭제 (메소드에 저자 추가) + * - 코드 컨벤션에 맞도록 CustomException 상속으로 변경 (변경 전: RuntimeException 상속) + */ +public class ArticleCollectorException extends CustomException { + private final ErrorCode errorCode; + + public ArticleCollectorException(ErrorCode errorCode) { + super(errorCode); + this.errorCode = errorCode; + } + + public ArticleCollectorException(ErrorCode errorCode, Throwable cause) { + super(errorCode); + this.errorCode = errorCode; + } + + /** + * + * @return + */ + @Override + public ErrorCode getErrorCode() { + return errorCode; + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/exception/BatchJobExceptionTranslator.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/exception/BatchJobExceptionTranslator.java new file mode 100644 index 0000000..0dd339d --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/exception/BatchJobExceptionTranslator.java @@ -0,0 +1,37 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.exception; + +import java.util.Map; + +import org.springframework.batch.core.JobParametersInvalidException; +import org.springframework.batch.core.repository.JobExecutionAlreadyRunningException; +import org.springframework.batch.core.repository.JobInstanceAlreadyCompleteException; +import org.springframework.batch.core.repository.JobRestartException; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; + +/** + * Spring Batch 작업 실행 중 발생할 수 있는 예외를

+ * {@link ArticleCollectorErrorCode}로 변환하는 컴포넌트입니다. + */ +@Component +public class BatchJobExceptionTranslator { + private static final Map, ArticleCollectorErrorCode> CODE_MAP = + Map.of( + JobExecutionAlreadyRunningException.class, ArticleCollectorErrorCode.JOB_ALREADY_RUNNING, + JobRestartException.class, ArticleCollectorErrorCode.JOB_RESTART_FAIL, + JobInstanceAlreadyCompleteException.class, ArticleCollectorErrorCode.JOB_ALREADY_COMPLETE, + JobParametersInvalidException.class, ArticleCollectorErrorCode.INVALID_JOB_PARAMETER + ); + + /** + * 주어진 예외를 해당하는 {@link ArticleCollectorErrorCode}로 변환합니다. + * 정의되지 않은 예외 클래스의 경우 {@code UNKNOWN_ERROR}를 반환합니다. + * + * @param e 변환할 예외 객체 + * @return 매핑된 {@link ArticleCollectorErrorCode} + */ + public ArticleCollectorErrorCode translate(Exception e) { + return CODE_MAP.getOrDefault(e.getClass(), ArticleCollectorErrorCode.UNKNOWN_ERROR); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/exception/error/ArticleCollectorErrorCode.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/exception/error/ArticleCollectorErrorCode.java new file mode 100644 index 0000000..6d18db4 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/exception/error/ArticleCollectorErrorCode.java @@ -0,0 +1,64 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.exception.error; + +import org.springframework.http.HttpStatus; + +import com.likelion.backendplus4.talkpick.batch.common.exception.error.ErrorCode; + +import lombok.Getter; +import lombok.RequiredArgsConstructor; + +/** + * 뉴스 기사 수집 관련 오류 코드를 정의하는 열거형 + * + * @since 2025-05-10 + * @modified 2025-05-11 + * - 클래스 주석에서 저자 삭제 + * - 예외 전역 처리를 위해 상속 구조로 변경 + * - 예외 전역 처리로 알 수 없는 오류 코드 삭제 + */ +@Getter +@RequiredArgsConstructor +public enum ArticleCollectorErrorCode implements ErrorCode { + + // 실행 오류 + JOB_ALREADY_RUNNING(HttpStatus.BAD_REQUEST, 150001, "[Quartz] Batch 실행 실패 - 현재 Job이 이미 실행 중입니다."), + JOB_ALREADY_COMPLETE(HttpStatus.BAD_REQUEST, 150002, "[Quartz] Batch 실행 실패 - 동일한 JobParameters로 실행된 Job이 이미 완료되었습니다."), + UNKNOWN_ERROR(HttpStatus.INTERNAL_SERVER_ERROR, 550001, "[Quartz] Batch 실행 중 알 수 없는 예외 발생"), + JOB_RESTART_FAIL(HttpStatus.INTERNAL_SERVER_ERROR, 550002, "[Quartz] Batch 실행 실패 - Job을 재시작할 수 없습니다. 이전 실행 상태가 불안정하거나 종료되지 않았을 수 있습니다."), + INVALID_JOB_PARAMETER(HttpStatus.INTERNAL_SERVER_ERROR, 550003, "[Quartz] Batch 실행 실패 - JobParameters가 유효하지 않습니다. 필수 파라미터 누락 또는 형식 오류일 수 있습니다."), + SCHEDULER_START_FAIL(HttpStatus.INTERNAL_SERVER_ERROR, 550004, "[Quartz] 스케줄러 시작 실패"), + SCHEDULER_STOP_FAIL(HttpStatus.INTERNAL_SERVER_ERROR, 550005, "[Quartz] 스케줄러 중지 실패"), + STATUS_CHECK_FAIL(HttpStatus.INTERNAL_SERVER_ERROR, 550006, "상태 확인 실패"), + + // RSS 정보 로드 관련 오류 + FEED_CONNECTION_ERROR(HttpStatus.INTERNAL_SERVER_ERROR, 450001,"RSS 피드 연결 중 오류가 발생했습니다."), + FEED_PARSING_ERROR(HttpStatus.INTERNAL_SERVER_ERROR, 450002,"RSS 피드 파싱 중 오류가 발생했습니다."), + FEED_TIMEOUT_ERROR(HttpStatus.INTERNAL_SERVER_ERROR, 450003, "RSS 피드 로드 중 시간 초과가 발생했습니다."), + + // Mapper 관련 오류 + MAPPER_NOT_FOUND(HttpStatus.INTERNAL_SERVER_ERROR, 450004, "요청한 매퍼를 찾을 수 없습니다."), + ITEM_MAPPING_ERROR(HttpStatus.INTERNAL_SERVER_ERROR, 450005,"RSS 항목 매핑 중 오류가 발생했습니다."), + + // 데이터베이스 관련 오류 + DB_SAVE_ERROR(HttpStatus.INTERNAL_SERVER_ERROR, 530001, "RSS 뉴스를 저장하는 중 오류가 발생했습니다."), + DUPLICATE_LINK_ERROR(HttpStatus.INTERNAL_SERVER_ERROR, 450006, "이미 존재하는 링크입니다."); + + private final HttpStatus status; + private final int code; + private final String message; + + @Override + public HttpStatus httpStatus() { + return status; + } + + @Override + public int codeNumber() { + return code; + } + + @Override + public String message() { + return message; + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/adapter/CollectorAdapter.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/adapter/CollectorAdapter.java new file mode 100644 index 0000000..e894fb8 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/adapter/CollectorAdapter.java @@ -0,0 +1,103 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.adapter; + +import org.quartz.Scheduler; +import org.quartz.SchedulerException; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.application.port.out.CollectorPort; +import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; +import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; + +import lombok.RequiredArgsConstructor; + +/** + * Quartz Scheduler 를 제어하는 CollectorPort 구현체. + * 외부 요청에 따라 스케줄러를 시작하거나 정지하며, + * 현재 실행 중인지 상태를 확인할 수 있다. + * + * @since 2025-05-10 + */ +@Component +@RequiredArgsConstructor +public class CollectorAdapter implements CollectorPort { + private final Scheduler scheduler; + + /** + * Quartz 스케줄러를 시작하고, 정상적으로 시작되었는지 상태를 확인한다. + * + * @return 스케줄러가 실행 중이면 true, 그렇지 않으면 false + * @author 함예정 + * @since 2025-05-10 + */ + @Override + public boolean start() { + return startScheduler(); + } + + /** + * Quartz 스케줄러를 standby 상태로 전환하여 정지한다.

+ * 이미 standby 상태인 경우에는 아무 작업도 하지 않는다. + * + * @return 정지 요청이 성공했으면 true + * @author 함예정 + * @since 2025-05-10 + */ + @Override + public boolean stop() { + try { + if (!scheduler.isInStandbyMode()) { + scheduler.standby(); + } + return true; + } catch (SchedulerException e) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.SCHEDULER_STOP_FAIL, e); + } + } + + /** + * 스케줄러가 실행 상태 플래그 확인 메소드 + * + * @return 스케줄러가 실행 중이면 true, 그렇지 않으면 false + * @author 함예정 + * @since 2025-05-10 + */ + @Override + public boolean isRunning() { + return checkSchedulerStatus(); + } + + /** + * Quartz 스케줄러를 세부 시작 메소드 + * 1. 실행 요청 + * 2. 실행 상태 플래그 반환 + * + * @return 스케줄러가 실행 중이면 true, 그렇지 않으면 false + * @throws ArticleCollectorException SchedulerException 발생으로 실행 실패 시 + * @author 함예정 + * @since 2025-05-11 + */ + private boolean startScheduler() { + try { + scheduler.start(); + return isRunning(); + } catch (SchedulerException e) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.SCHEDULER_START_FAIL, e); + } + } + + /** + * 스케줄러의 현재 상태를 확인합니다. + * + * @return 스케줄러가 시작되었고 대기 모드가 아닌 경우 true, 그렇지 않으면 false + * @throws ArticleCollectorException 스케줄러 상태 확인 중 예외 발생 시 커스텀 예외로 래핑하여 던짐 + * @author 함예정 + * @since 2025-05-11 + */ + private boolean checkSchedulerStatus() { + try { + return scheduler.isStarted() && !scheduler.isInStandbyMode(); + } catch (SchedulerException e) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.STATUS_CHECK_FAIL, e); + } + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/batch/JobConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/batch/JobConfig.java new file mode 100644 index 0000000..3d45269 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/batch/JobConfig.java @@ -0,0 +1,45 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.batch; + +import org.springframework.batch.core.Job; +import org.springframework.batch.core.Step; +import org.springframework.batch.core.job.builder.JobBuilder; +import org.springframework.batch.core.repository.JobRepository; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +/** + * Spring Batch Job 설정 클래스.

+ * RSS 기사 수집을 위한 Batch Job 을 정의하며, 파티셔닝된 Step 을 시작 단계로 구성한다.

+ * + * 이 잡은 {@code articleCollectorBatchJob}이라는 이름으로 정의되며, + * {@link Step} 객체는 외부에서 주입받아 사용한다.

+ * + * 해당 Job 은 Quartz 또는 Spring Scheduler 를 통해 주기적으로 실행될 수 있다.

+ * + * @since 2025-05-10 + */ +@Configuration +public class JobConfig { + private final String jobName = "articleCollectorBatchJob"; + private final JobRepository jobRepository; + private final Step rssPartitionedStep; + + public JobConfig(JobRepository jobRepository, Step articleRssPartitionedStep) { + this.jobRepository = jobRepository; + this.rssPartitionedStep = articleRssPartitionedStep; + } + + /** + * RSS 기사 수집용 Spring Batch Job Bean을 생성한다. + * 파티셔닝 Step 을 실행하도록 구성한다. + * + * @return RSS 기사 수집 배치 Job + * @author 함예정 + * @since 2025-05-10 + */ + @Bean + public Job articleCollectJob() { + return new JobBuilder(jobName, jobRepository) + .start(rssPartitionedStep) + .build(); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/batch/NewsCategory.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/batch/NewsCategory.java new file mode 100644 index 0000000..6e33563 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/batch/NewsCategory.java @@ -0,0 +1,27 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.batch; + +/** + * 뉴스 기사 표준 카테고리 + * + * @author 양병학 + * @since 2025-05-12 + */ +public enum NewsCategory { + POLITICS("정치"), + ECONOMY("경제"), + SOCIETY("사회"), + INTERNATIONAL("국제"), + ENTERTAINMENT("연예"), + SPORTS("스포츠"), + TOTAL("전체"); + + private final String displayName; + + NewsCategory(String displayName) { + this.displayName = displayName; + } + + public String getDisplayName() { + return displayName; + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/batch/RssSource.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/batch/RssSource.java new file mode 100644 index 0000000..a82e1d5 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/batch/RssSource.java @@ -0,0 +1,117 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.batch; + +import lombok.Getter; + +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +/** + * RSS 뉴스 소스와 URL을 정의하는 열거형 + * 각 항목은 언론사, 카테고리, URL 정보를 포함 + * + * @author 양병학 + * @since 2025-05-10 + * @modified 2025-05-12 표준 카테고리(NewsCategory) 도입 및 동아일보, 경향신문 카테고리별 피드 추가 + */ +@Getter +public enum RssSource { + // 국민일보 RSS 피드 + KMIB_POLITICS("국민일보", NewsCategory.POLITICS, "https://www.kmib.co.kr/rss/data/kmibPolRss.xml", "km", true), + KMIB_ECONOMY("국민일보", NewsCategory.ECONOMY, "https://www.kmib.co.kr/rss/data/kmibEcoRss.xml", "km", true), + KMIB_SOCIETY("국민일보", NewsCategory.SOCIETY, "https://www.kmib.co.kr/rss/data/kmibSocRss.xml", "km", true), + KMIB_INTERNATIONAL("국민일보", NewsCategory.INTERNATIONAL, "https://www.kmib.co.kr/rss/data/kmibIntRss.xml", "km", true), + KMIB_ENTERTAINMENT("국민일보", NewsCategory.ENTERTAINMENT, "https://www.kmib.co.kr/rss/data/kmibEntRss.xml", "km", true), + KMIB_SPORTS("국민일보", NewsCategory.SPORTS, "https://www.kmib.co.kr/rss/data/kmibSpoRss.xml", "km", true), + + // 동아일보 RSS 피드 + DONGA_POLITICS("동아일보", NewsCategory.POLITICS, "https://rss.donga.com/politics.xml", "da", true), + DONGA_ECONOMY("동아일보", NewsCategory.ECONOMY, "https://rss.donga.com/economy.xml", "da", true), + DONGA_SOCIETY("동아일보", NewsCategory.SOCIETY, "https://rss.donga.com/national.xml", "da", true), + DONGA_INTERNATIONAL("동아일보", NewsCategory.INTERNATIONAL, "https://rss.donga.com/international.xml", "da", true), + DONGA_ENTERTAINMENT("동아일보", NewsCategory.ENTERTAINMENT, "https://rss.donga.com/entertainment.xml", "da", true), + DONGA_SPORTS("동아일보", NewsCategory.SPORTS, "https://rss.donga.com/sports.xml", "da", true), + + // 경향신문 RSS 피드 + KHAN_POLITICS("경향신문", NewsCategory.POLITICS, "https://www.khan.co.kr/rss/rssdata/politic_news.xml", "kh", true), + KHAN_ECONOMY("경향신문", NewsCategory.ECONOMY, "https://www.khan.co.kr/rss/rssdata/economy_news.xml", "kh", true), + KHAN_SOCIETY("경향신문", NewsCategory.SOCIETY, "https://www.khan.co.kr/rss/rssdata/society_news.xml", "kh", true), + KHAN_INTERNATIONAL("경향신문", NewsCategory.INTERNATIONAL, "https://www.khan.co.kr/rss/rssdata/world_news.xml", "kh", true), + KHAN_ENTERTAINMENT("경향신문", NewsCategory.ENTERTAINMENT, "https://www.khan.co.kr/rss/rssdata/art_news.xml", "kh", true), + KHAN_SPORTS("경향신문", NewsCategory.SPORTS, "https://www.khan.co.kr/rss/rssdata/sports_news.xml", "kh", true); + + private final String publisherName; + private final NewsCategory category; + private final String url; + private final String mapperType; + private final boolean enabled; + + RssSource(String publisherName, NewsCategory category, String url, String mapperType, boolean enabled) { + this.publisherName = publisherName; + this.category = category; + this.url = url; + this.mapperType = mapperType; + this.enabled = enabled; + } + + /** + * 카테고리 이름 반환 + * + * @return 카테고리 표시 이름 + */ + public String getCategoryName() { + return category.getDisplayName(); + } + + /** + * 언론사 이름과 카테고리를 결합한 표시 이름 반환 + */ + public String getDisplayName() { + return publisherName + "-" + getCategoryName(); + } + + /** + * 매퍼 타입 키 반환 + */ + public String getMapperType() { + return mapperType; + } + + /** + * 언론사 코드 접두사 반환 (대문자) + */ + public String getCodePrefix() { + return mapperType.toUpperCase(); + } + + /** + * 활성화된 모든 소스 반환 + */ + public static List getEnabledSources() { + return Arrays.stream(values()) + .filter(RssSource::isEnabled) + .collect(Collectors.toList()); + } + + /** + * 특정 언론사의 모든 소스 반환 + */ + public static List getSourcesByPublisher(String publisherName) { + return Arrays.stream(values()) + .filter(source -> source.getPublisherName().equals(publisherName)) + .collect(Collectors.toList()); + } + + /** + * 특정 카테고리의 모든 소스 반환 + * + * @param category 검색할 카테고리 + * @return 해당 카테고리의 활성화된 소스 목록 + */ + public static List getSourcesByCategory(NewsCategory category) { + return Arrays.stream(values()) + .filter(RssSource::isEnabled) + .filter(source -> source.getCategory() == category) + .collect(Collectors.toList()); + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/batch/StepConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/batch/StepConfig.java new file mode 100644 index 0000000..03143a1 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/batch/StepConfig.java @@ -0,0 +1,104 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.batch; + +import java.util.List; + +import org.springframework.batch.core.Step; +import org.springframework.batch.core.partition.support.Partitioner; +import org.springframework.batch.core.repository.JobRepository; +import org.springframework.batch.core.step.builder.StepBuilder; +import org.springframework.batch.item.ItemProcessor; +import org.springframework.batch.item.ItemReader; +import org.springframework.batch.item.ItemWriter; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.core.task.TaskExecutor; +import org.springframework.transaction.PlatformTransactionManager; + +import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; + +/** + * Spring Batch Step 구성 클래스. + * RSS 기사 수집을 위한 병렬 처리용 파티셔닝 Step과 + * 실제 처리 로직이 포함된 청크 기반 Step을 설정한다. + * + * - articleRssPartitionedStep: 소스 데이터를 파티셔닝하여 병렬로 처리 + * - parseRssStep: 각 파티션 단위에서 RSS 데이터를 읽고, 가공 후 저장 + * + * @since 2025-05-10 + */ +@Configuration +public class StepConfig { + private final String executorName = "normalExecutor"; + private static final String partitionedStepName = "articleRssPartitionedStep"; + private final String parseRssStepName = "parseRssStep"; + private final int gridSize = 3; + private final int chunkSize = 10; + private final int retryLimit = 3; + private final int skipLimit = 100; + private final JobRepository jobRepository; + private final Partitioner rssSourcePartitioner; + private final PlatformTransactionManager transactionManager; + private final TaskExecutor taskExecutor; + private final ItemProcessor> processor; + private final ItemWriter> writer; + + public StepConfig(JobRepository jobRepository, + Partitioner rssSourcePartitioner, + PlatformTransactionManager platformTransactionManager, + @Qualifier(executorName) + TaskExecutor taskExecutor, + ItemProcessor> processor, + ItemWriter> writer) { + this.jobRepository = jobRepository; + this.rssSourcePartitioner = rssSourcePartitioner; + this.transactionManager = platformTransactionManager; + this.taskExecutor = taskExecutor; + this.processor = processor; + this.writer = writer; + } + + /** + * RSS 소스 데이터를 파티셔닝하여 병렬로 처리하는 Step을 정의한다. + * 내부적으로 {@code parseRssStep}을 병렬 실행하며, TaskExecutor를 통해 스레드 분산 처리한다. + * + * @param parseRssStep 파티션마다 실행될 실제 처리 Step + * @return 파티셔닝 기반 Step + * @author 함예정 + * @since 2025-05-10 + */ + @Bean + public Step articleRssPartitionedStep(Step parseRssStep) { + return new StepBuilder(partitionedStepName, jobRepository) + .partitioner(parseRssStep.getName(), rssSourcePartitioner) + .step(parseRssStep) + .taskExecutor(taskExecutor) + .gridSize(gridSize) + .build(); + } + + /** + * RSS 데이터를 청크 단위로 읽고, 가공하고, 저장하는 Step을 정의한다. + * 예외 발생 시 지정된 예외 클래스는 skip 처리되며, {@code skipLimit} 이하까지 허용된다. + * + * @param articleReader RSS 데이터 소스를 읽는 Reader + * @return RSS 처리용 Step + * @author 함예정 + * @since 2025-05-10 + */ + @Bean + public Step parseRssStep(ItemReader articleReader) { + return new StepBuilder(parseRssStepName, jobRepository) + .>chunk(chunkSize, transactionManager) + .reader(articleReader) + .processor(processor) + .writer(writer) + .faultTolerant() + .retry(ArticleCollectorException.class) + .retryLimit(retryLimit) + .skip(ArticleCollectorException.class) + .skipLimit(skipLimit) + .build(); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/executor/BatchJobExecutor.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/executor/BatchJobExecutor.java new file mode 100644 index 0000000..c226437 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/executor/BatchJobExecutor.java @@ -0,0 +1,73 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.executor; + +import org.quartz.DisallowConcurrentExecution; +import org.quartz.JobExecutionContext; +import org.springframework.batch.core.Job; +import org.springframework.batch.core.JobParameters; +import org.springframework.batch.core.JobParametersBuilder; +import org.springframework.batch.core.JobParametersInvalidException; +import org.springframework.batch.core.launch.JobLauncher; +import org.springframework.batch.core.repository.JobExecutionAlreadyRunningException; +import org.springframework.batch.core.repository.JobInstanceAlreadyCompleteException; +import org.springframework.batch.core.repository.JobRestartException; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.exception.BatchJobExceptionTranslator; +import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; +import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; + +import lombok.RequiredArgsConstructor; + +/** + * Quartz에 의해 트리거되는 Spring Batch Job 실행 클래스. + * JobLauncher를 통해 {@code rssJob}을 수동 실행하며, 매 실행 시 고유한 JobParameters를 생성하여 중복 실행을 방지한다. + * + * - @DisallowConcurrentExecution: 이전 실행이 끝나기 전에는 새로운 실행이 중첩되지 않도록 제한 + * - JobParameters에 timestamp를 포함시켜 매번 다른 인스턴스로 실행되도록 설정 + * + * 이 클래스는 단순한 실행자 역할만 하며, 실제 배치 로직은 {@code rssJob} 내부에 정의되어 있다. + * + * @since 2025-05-10 + */ +@Component +@DisallowConcurrentExecution +@RequiredArgsConstructor +public class BatchJobExecutor implements org.quartz.Job { + private final JobLauncher jobLauncher; + private final Job articleCollectorBatchJob; + private final BatchJobExceptionTranslator batchJobExceptionTranslator; + + /** + * Quartz 트리거에 의해 호출되는 메서드. + * 내부적으로 Spring Batch Job을 실행하는 로직을 위임한다. + * + * @param jobExecutionContext Quartz 실행 컨텍스트 + * @author 함예정 + * @since 2025-05-10 + */ + @Override + public void execute(JobExecutionContext jobExecutionContext) { + startSpringBatchJob(); + } + + /** + * Spring Batch Job을 JobLauncher를 통해 실행한다. + * 각 실행마다 timestamp 파라미터를 부여하여 중복 실행 방지. + * 예외 발생 시 {@link ArticleCollectorException}으로 변환하여 처리한다. + * + * @author 함예정 + * @since 2025-05-10 + */ + private void startSpringBatchJob() { + JobParameters params = new JobParametersBuilder() + .addLong("timestamp", System.currentTimeMillis()) + .toJobParameters(); + + try { + jobLauncher.run(articleCollectorBatchJob, params); + } catch (Exception e) { + ArticleCollectorErrorCode exceptionCode = batchJobExceptionTranslator.translate(e); + throw new ArticleCollectorException(exceptionCode); + } + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/executor/TaskExecutorConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/executor/TaskExecutorConfig.java new file mode 100644 index 0000000..a5fc332 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/executor/TaskExecutorConfig.java @@ -0,0 +1,49 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.executor; + +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.core.task.TaskExecutor; +import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor; + +import com.likelion.backendplus4.talkpick.batch.common.decorator.logging.MdcTaskDecorator; + +import lombok.Getter; +import lombok.RequiredArgsConstructor; + +/** + * 비동기 작업 처리를 위한 TaskExecutor 설정 클래스. + * Spring Batch 또는 기타 병렬 처리를 위한 ThreadPool 기반 Executor를 정의한다. + * + * - {@link MdcTaskDecorator}를 통해 각 스레드에 MDC 로그 컨텍스트를 유지 + * + * @since 2025-05-02 + * @modified 2025-05-10 + */ +@Configuration +@RequiredArgsConstructor +public class TaskExecutorConfig { + private final MdcTaskDecorator mdcTaskDecorator; + + @Getter + private static final String normalExecutorName = "normalExecutor"; + + /** + * 일반적인 작업처리를 위한 ThreadPool 기반 TaskExecutor 설정 + * + * @return TaskExecutor 인스턴스 + * @author 함예정 + * @since 2025-05-02 + * @modified 2025-05-10 + */ + @Bean(normalExecutorName) + public TaskExecutor taskExecutor() { + ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor(); + executor.setCorePoolSize(5); + executor.setMaxPoolSize(10); + executor.setQueueCapacity(10); + executor.setTaskDecorator(mdcTaskDecorator); + executor.setThreadNamePrefix("normalExecutor-"); + executor.initialize(); + return executor; + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/quartz/QuartzJobConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/quartz/QuartzJobConfig.java new file mode 100644 index 0000000..8cf1f4b --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/quartz/QuartzJobConfig.java @@ -0,0 +1,47 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.quartz; + +import org.quartz.JobBuilder; +import org.quartz.JobDetail; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.executor.BatchJobExecutor; + +/** + * RSS 피드를 정기적으로 수집하는 스케줄 작업 + * application.yml의 rss.scheduler.cron 속성으로 실행 주기 설정 + * 설정이 없을 경우 기본값으로 1분마다 실행 + * + * 1. 서비스를 거쳐서 피드 수집 + * 2. 처리된 항목 수 로깅 + * 3. 예외 발생 시 오류 로깅하고 다음 스케쥴까지 대기 + * + * @modified 2025-05-11 Spring Scheduler 에서 Quartz Scheduler 으로 전환 + * @modified 2025-05-10 cron 표현식을 application 으로 분리 + * @since 2025-05-10 최초 작성 + * + */ +@Configuration +public class QuartzJobConfig { + private final String articleCollectorJobDetailName = "articleCollectorJobDetail"; + + + /** + * RSS 수집 Quartz JobDetail 빈 등록. + * Job 클래스는 {@link BatchJobExecutor}이며 다음과 같은 설정을 포함한다: + * - withIdentity("rssBatchJob"): Scheduler 내에서 이 Job을 고유하게 식별하기 위한 이름 지정 + * - storeDurably(): Trigger가 없더라도 Scheduler에 등록된 상태로 유지되도록 설정 + * + * @return RSS 배치 작업용 JobDetail 객체 + * @author 함예정 + * @since 2025-05-10 + */ + @Bean + public JobDetail articleCollectorJobDetail() { + return JobBuilder.newJob(BatchJobExecutor.class) + .withIdentity(articleCollectorJobDetailName) + .storeDurably() + .build(); + } + +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/quartz/QuartzTriggerConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/quartz/QuartzTriggerConfig.java new file mode 100644 index 0000000..b59a4cc --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/quartz/QuartzTriggerConfig.java @@ -0,0 +1,49 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.quartz; + +import org.quartz.CronScheduleBuilder; +import org.quartz.JobDetail; +import org.quartz.Trigger; +import org.quartz.TriggerBuilder; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +@Configuration +public class QuartzTriggerConfig { + private final String cronExpression; + private final JobDetail articleCollectorJobDetail; + private final String articleCollectorJobDetailName = "articleCollectorJobDetail"; + + /** + * 생성자 주입을 통해 Cron 표현식을 설정한다. + * + * @param cronExpression RSS 배치 실행 주기를 정의하는 Cron 표현식 + * application.yml에서 article-collector.quartz.cron 값을 로드 합니다. + * @author 함예정 + * @since 2025-05-10 + */ + public QuartzTriggerConfig(@Value("${article-collector.quartz.cron}") String cronExpression, + JobDetail articleCollectorJobDetail) { + this.cronExpression = cronExpression; + this.articleCollectorJobDetail = articleCollectorJobDetail; + } + + /** + * RSS 수집 Quartz Trigger 빈 등록. + * - forJob: 이 Trigger 가 어떤 Quartz Job 과 연관되어 실행될지를 지정 + * - withIdentity: Scheduler 내에서 이 Trigger 를 고유하게 식별하기 위한 이름 지정 + * - withSchedule: Cron 표현식을 사용하여 실행 주기 설정 + * + * @return RSS 배치 작업용 Trigger 객체 + * @author 함예정 + * @since 2025-05-10 + */ + @Bean + public Trigger rssBatchTrigger() { + return TriggerBuilder.newTrigger() + .forJob(articleCollectorJobDetail) + .withIdentity(articleCollectorJobDetailName) + .withSchedule(CronScheduleBuilder.cronSchedule(cronExpression)) + .build(); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/processor/RssEntryProcessor.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/processor/RssEntryProcessor.java new file mode 100644 index 0000000..c51130a --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/processor/RssEntryProcessor.java @@ -0,0 +1,101 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.processor; + +import java.util.ArrayList; +import java.util.List; + +import org.springframework.batch.core.configuration.annotation.StepScope; +import org.springframework.batch.item.ItemProcessor; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper.factory.RssMappingFactory; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.batch.RssSource; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper.AbstractRssMapper; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; +import com.rometools.rome.feed.synd.SyndEntry; + +/** + * RSS 소스를 기반으로 기사 목록을 생성하는 Spring Batch ItemProcessor 구현체. + * 주어진 {@link RssSource}의 URL로부터 SyndEntry 목록을 파싱하고, + * 매핑 전략에 따라 {@link ArticleEntity} 리스트로 변환한다. + * + * - RSS 파싱: {@link RssFeedReader}를 통해 피드를 읽어옴 + * - 데이터 매핑: {@link RssMappingFactory}에서 소스 타입에 따라 매퍼 선택 + * + * 이 클래스는 Step 실행 시에만 생성되며, StepScope에 따라 각 파티션마다 독립적으로 주입된다. + * + * @since 2025-05-10 + * @modified 2025-05-13 RssMapper to AbstractRssMapper로 변경 + */ +@Component +@StepScope +public class RssEntryProcessor implements ItemProcessor> { + + private final RssFeedReader rssFeedReader; + private final RssMappingFactory mappingFactory; + + public RssEntryProcessor(RssFeedReader rssFeedReader, RssMappingFactory mappingFactory) { + this.rssFeedReader = rssFeedReader; + this.mappingFactory = mappingFactory; + } + + /** + * 단일 RSS 소스를 받아 파싱 후 기사 리스트로 변환한다. + * - RSS 파싱 + * - 매핑 전략 선택 + * - 기사 리스트 생성 + * + * @param source RSS 피드 소스 정보 + * @return 해당 소스에서 추출된 기사 엔티티 리스트 + * @author 함예정 + * @since 2025-05-10 + */ + @Override + public List process(RssSource source) { + List rssParseResult = parseRss(source); + AbstractRssMapper mapper = getMapper(source); + return buildArticleEntityList(source, rssParseResult, mapper); + } + + /** + * RSS 소스의 URL을 기반으로 피드를 파싱하여 SyndEntry 리스트를 반환한다. + * + * @param source RSS 피드 소스 + * @return 파싱된 RSS 엔트리 리스트 + * @since 2025-05-10 + * @author 함예정 + */ + private List parseRss(RssSource source) { + return rssFeedReader.getFeed(source.getUrl()); + } + + /** + * RSS 소스의 매퍼 타입에 따라 적절한 매퍼를 반환한다. + * + * @param source 매핑 전략이 포함된 RSS 소스 + * @return 매퍼 인스턴스 + * @since 2025-05-10 + * @author 함예정 + * @modified 2025-05-13 AbstractRssMapper 타입으로 변경 + */ + private AbstractRssMapper getMapper(RssSource source) { + return mappingFactory.getMapper(source.getMapperType()); + } + + /** + * SyndEntry RSS 결과를 기반으로 ArticleEntity 리스트를 생성한다. + * + * @param source RSS 소스 정보 + * @param rssParseResult RSS 피드에서 파싱된 엔트리 리스트 + * @param mapper 소스에 맞는 RSS 매퍼 + * @return 변환된 ArticleEntity 리스트 + * @since 2025-05-10 + * @author 함예정 + * @modified 2025-05-13 AbstractRssMapper 타입으로 변경 + */ + private List buildArticleEntityList(RssSource source, List rssParseResult, + AbstractRssMapper mapper) { + return rssParseResult.stream() + .map(entry -> mapper.mapToRssNews(entry, source)) + .toList(); + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/processor/RssFeedReader.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/processor/RssFeedReader.java new file mode 100644 index 0000000..045ac1c --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/processor/RssFeedReader.java @@ -0,0 +1,96 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.processor; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.net.URLConnection; +import java.util.List; + +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; +import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; +import com.rometools.rome.feed.synd.SyndEntry; +import com.rometools.rome.feed.synd.SyndFeed; +import com.rometools.rome.io.SyndFeedInput; +import com.rometools.rome.io.XmlReader; + +/** + * RSS 피드 URL을 통해 XML 피드를 읽고 파싱하여 {@link SyndEntry} 목록으로 반환하는 Reader 클래스. + * Rome 라이브러리를 이용하여 RSS를 파싱하며, 유효하지 않은 URL 또는 파싱 오류에 대해 예외를 처리한다. + * + * @since 2025-05-10 + */ +@Component +public class RssFeedReader { + + /** + * 주어진 피드 URL로부터 RSS 피드를 파싱하고, {@link SyndEntry} 리스트를 반환한다. + * + * @param feedUrl RSS 피드의 URL 문자열 + * @return 파싱된 SyndEntry 목록 + * @since 2025-05-10 + * @author 함예정 + */ + public List getFeed(String feedUrl) { + URL url = getURL(feedUrl); + URLConnection connection = openConnectionWithTimeout(url); + return parseRssEntries(connection); + } + + /** + * 문자열 형태의 URL을 {@link URL} 객체로 변환한다. + * + * @param feedUrl 문자열 형태의 URL + * @return URL 객체 + * @throws RuntimeException 유효하지 않은 URL 형식일 경우 + * @since 2025-05-10 + * @author 함예정 + */ + private URL getURL(String feedUrl) { + try { + return new URL(feedUrl); + } catch (MalformedURLException e) { + throw new RuntimeException(e); + } + } + + /** + * 지정된 URL에 대해 연결 타임아웃과 읽기 타임아웃을 설정한 후 URLConnection을 반환합니다. + * + * @param url 연결할 URL 객체 + * @return 설정된 타임아웃을 가진 URLConnection 객체 + * @throws RuntimeException 연결 중 IOException이 발생할 경우 런타임 예외로 래핑하여 던짐 + * @author 함예정 + * @since 2025-05-12 + */ + private URLConnection openConnectionWithTimeout(URL url) { + try { + URLConnection connection = url.openConnection(); + connection.setConnectTimeout(3000); + connection.setReadTimeout(5000); + return connection; + } catch (IOException e) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.FEED_CONNECTION_ERROR, e); + } + } + + /** + * 주어진 URLConnection으로부터 RSS 피드를 읽어 SyndEntry 목록으로 파싱합니다. + * + * @param connection RSS 피드를 제공하는 URLConnection 객체 + * @return 파싱된 SyndEntry 객체 리스트 + * @throws ArticleCollectorException RSS 피드 파싱 중 오류가 발생한 경우 사용자 정의 예외로 래핑하여 던짐 + * @author 함예정 + * @since 2025-05-12 + */ + private List parseRssEntries(URLConnection connection) { + try (XmlReader reader = new XmlReader(connection)) { + SyndFeedInput input = new SyndFeedInput(); + SyndFeed syndFeed = input.build(reader); + return syndFeed.getEntries(); + } catch (Exception e) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.FEED_PARSING_ERROR, e); + } + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/reader/ArticleReader.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/reader/ArticleReader.java new file mode 100644 index 0000000..e6f1e20 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/reader/ArticleReader.java @@ -0,0 +1,51 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.reader; + +import java.util.Iterator; +import java.util.List; + +import org.springframework.batch.core.configuration.annotation.StepScope; +import org.springframework.batch.item.ItemReader; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.batch.RssSource; + +import lombok.extern.slf4j.Slf4j; + +/** + * StepExecutionContext로부터 전달받은 RSS 소스 리스트를 순차적으로 제공하는 ItemReader 구현체. + * Spring Batch의 Step 내부에서 Partition 단위로 각 소스를 하나씩 읽어 처리하는 데 사용된다. + * + * @since 2025-05-10 + */ +@Slf4j +@Component +@StepScope +public class ArticleReader implements ItemReader { + + private final Iterator iterator; + + /** + * StepExecutionContext에 저장된 RSS 소스 리스트를 기반으로 Iterator를 초기화한다. + * + * @param sources StepExecutionContext로부터 전달된 RSS 소스 리스트 + * @since 2025-05-10 + * @author 함예정 + */ + public ArticleReader(@Value("#{stepExecutionContext['sourceList']}") List sources) { + this.iterator = sources.iterator(); + } + + /** + * RSS 소스를 하나씩 반환하며, 더 이상 남은 소스가 없으면 null을 반환한다. + * null 반환 시 해당 Step의 실행이 종료된다. + * + * @return 하나의 RssSource 또는 null + * @since 2025-05-10 + * @author 함예정 + */ + @Override + public RssSource read() { + return iterator.hasNext() ? iterator.next() : null; + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/reader/RssSourcePartitioner.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/reader/RssSourcePartitioner.java new file mode 100644 index 0000000..0281f11 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/reader/RssSourcePartitioner.java @@ -0,0 +1,131 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.reader; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.springframework.batch.core.partition.support.Partitioner; +import org.springframework.batch.item.ExecutionContext; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.batch.RssSource; + +import lombok.extern.slf4j.Slf4j; + +/** + * 활성화된 RSS 소스를 파티션 단위로 분할하여 StepExecutionContext에 전달하는 Partitioner 구현체. + * Spring Batch에서 멀티 스레드/병렬 실행을 위해 사용된다. + * + * 각 파티션은 sourceList를 포함한 ExecutionContext로 구성된다. + * + * @since 2025-05-10 + */ +@Slf4j +@Component +public class RssSourcePartitioner implements Partitioner { + + /** + * 전체 RSS 소스를 파티셔닝하여 각 파티션별 ExecutionContext를 생성한다. + * + * @param gridSize 실행할 파티션 수 + * @return 파티션 이름과 ExecutionContext의 매핑 정보 + * @since 2025-05-10 + * @author 함예정 + */ + @Override + public Map partition(int gridSize) { + List sources = RssSource.getEnabledSources(); + int chunkSize = calculateChunkSize(sources.size(), gridSize); + return buildPartitions(sources, chunkSize); + } + + /** + * 총 소스 수와 파티션 수를 기반으로 파티션당 소스 개수를 계산한다. + * + * @param totalSources 전체 RSS 소스 수 + * @param gridSize 파티션 수 + * @return 파티션당 소스 개수 + * @since 2025-05-10 + * @author 함예정 + */ + private int calculateChunkSize(int totalSources, int gridSize) { + int chunkSize = (int)Math.ceil((double)totalSources / gridSize); + log.info("Calculated chunkSize: {}", chunkSize); + return chunkSize; + } + + /** + * RSS 소스를 주어진 chunkSize로 나눠 각 파티션별 ExecutionContext를 생성한다. + * + * @param sources RSS 소스 리스트 + * @param chunkSize 파티션당 소스 개수 + * @return 파티션 맵 + * @since 2025-05-10 + * @author 함예정 + */ + private Map buildPartitions(List sources, int chunkSize) { + Map partitions = new HashMap<>(); + int totalPartitions = calculateTotalPartitions(sources, chunkSize); + + for (int i = 0; i < totalPartitions; i++) { + int from = i * chunkSize; + int to = calculateChunkEndIndex(sources, chunkSize, from); + + if (from >= to) { + break; + } + + ExecutionContext context = buildExecutionContext(sources, from, to); + partitions.put("partition" + i, context); + } + + return partitions; + } + + /** + * 주어진 RSS 소스 리스트를 청크 크기(chunkSize)로 분할할 때 필요한 총 파티션 수를 계산합니다. + * + * @param sources RSS 소스 목록 + * @param chunkSize 하나의 파티션에 포함될 RSS 소스 수 + * @return 전체 파티션 수 + * @author 함예정 + * @since 2025-05-12 + */ + private int calculateTotalPartitions(List sources, int chunkSize) { + return (sources.size() + chunkSize - 1) / chunkSize; + } + + /** + * 주어진 시작 인덱스(from)와 청크 크기(chunkSize)를 기반으로, + * 리스트의 범위를 초과하지 않도록 제한된 끝 인덱스를 계산합니다. + * + * @param sources RSS 소스 리스트 + * @param chunkSize 하나의 파티션에 포함될 RSS 소스 수 + * @param from 시작 인덱스 + * @return 리스트 범위를 초과하지 않는 끝 인덱스 + * @author 함예정 + * @since 2025-05-12 + */ + private int calculateChunkEndIndex(List sources, int chunkSize, int from) { + return Math.min(from + chunkSize, sources.size()); + } + + /** + * 지정된 인덱스 범위에 해당하는 RSS 소스 부분 리스트로 ExecutionContext를 생성한다. + * 생성된 context는 Spring Batch 파티션 실행 시 각 Step에 전달된다. + * + * @param sources 전체 RSS 소스 리스트 + * @param from 시작 인덱스 (포함) + * @param to 종료 인덱스 (미포함) + * @return 파티션별 RSS 소스가 포함된 ExecutionContext + * @since 2025-05-10 + * @author 함예정 + */ + private ExecutionContext buildExecutionContext(List sources, int from, int to) { + List subList = new ArrayList<>(sources.subList(from, to)); + ExecutionContext context = new ExecutionContext(); + context.put("sourceList", subList); + return context; + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/AbstractRssMapper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/AbstractRssMapper.java new file mode 100644 index 0000000..b021c69 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/AbstractRssMapper.java @@ -0,0 +1,140 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper; + +import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; +import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.batch.RssSource; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; +import com.rometools.rome.feed.synd.SyndContent; +import com.rometools.rome.feed.synd.SyndEntry; + +import java.time.LocalDateTime; +import java.time.ZoneId; +import java.util.Date; +import java.util.Optional; + +/** + * RSS를 ArticleEntity로 변환하는 추상 클래스 + * 공통 변환 로직을 제공 + * + * @author 양병학 + * @since 2025-05-13 최초 작성 + */ +public abstract class AbstractRssMapper { + + /** + * RSS 피드를 ArticleEntity 엔티티로 변환 + * + * @param entry 변환할 SyndEntry(Rss 데이터) 객체 + * @param source RSS 소스 정보 + * @return 변환된 ArticleEntity 엔티티 + */ + public ArticleEntity mapToRssNews(SyndEntry entry, RssSource source) { + String title = extractTitle(entry); + String link = extractLink(entry); + LocalDateTime pubDate = extractPubDate(entry); + String guid = extractGuid(entry, source); + String description = extractDescription(entry); + String category = extractCategory(entry, source); + + return buildArticleEntity(title, link, pubDate, guid, description, category); + } + + /** + * 매퍼의 유형을 식별하는 코드 반환 + * 소문자 언론사 코드 형태 (예: "km", "da", "kh") + */ + public abstract String getMapperType(); + + /** + * Date 객체를 LocalDateTime으로 변환 + * + * @param date 변환할 Date 객체 + * @return 변환된 LocalDateTime 객체, date가 null이면 현재 시간 반환 + */ + protected LocalDateTime convertToLocalDateTime(Date date) { + return date != null + ? date.toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime() + : LocalDateTime.now(); + } + + /** + * 제목 추출 메서드 + */ + protected String extractTitle(SyndEntry entry) { + return entry.getTitle(); + } + + /** + * 링크 추출 메서드 + * + * @param entry RSS 항목 + * @return 링크 + */ + protected String extractLink(SyndEntry entry) { + return entry.getLink(); + } + + /** + * 발행일 추출 메서드 + * + * @param entry RSS 항목 + * @return 발행일 + */ + protected LocalDateTime extractPubDate(SyndEntry entry) { + return convertToLocalDateTime(entry.getPublishedDate()); + } + + /** + * 설명 추출 메서드 + * + * @param entry RSS 항목 + * @return 추출된 설명 + */ + protected String extractDescription(SyndEntry entry) { + return entry.getDescription() != null ? entry.getDescription().getValue() : null; + } + + /** + * 카테고리 추출 메서드 + * + * @param entry RSS 항목 + * @param source RSS 소스 정보 + * @return 카테고리 + */ + protected String extractCategory(SyndEntry entry, RssSource source) { + return source.getCategoryName(); + } + + /** + * GUID 추출 메서드 - 하위 클래스에서 구현해야 함 + * + * @param entry RSS 항목 + * @param source RSS 소스 정보 + * @return GUID + */ + protected abstract String extractGuid(SyndEntry entry, RssSource source); + + /** + * ArticleEntity 객체 생성 + * + * @param title 제목 + * @param link 링크 + * @param pubDate 발행일 + * @param guid GUID + * @param description 설명 + * @param category 카테고리 + * @return 생성된 ArticleEntity + */ + private ArticleEntity buildArticleEntity(String title, String link, LocalDateTime pubDate, + String guid, String description, String category) { + return ArticleEntity.builder() + .title(title) + .link(link) + .pubDate(pubDate) + .category(category) + .guid(guid) + .description(description) + .isSummary(false) + .build(); + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/factory/RssMappingFactory.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/factory/RssMappingFactory.java new file mode 100644 index 0000000..54f2e82 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/factory/RssMappingFactory.java @@ -0,0 +1,50 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper.factory; + +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Component; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper.AbstractRssMapper; + +/** + * RSS 매핑 전략을 관리하는 팩토리 클래스 + * + * @since 2025-05-10 + * @modified 2025-05-13 RssMapper 인터페이스 대신 AbstractRssMapper 사용 + */ +@Component +public class RssMappingFactory { + + private final Map mappers = new HashMap<>(); + + /** + * AbstractRssMapper 구현체를 받아서 Mapper에 등록 + * + * @param availableMappers AbstractRssMapper List 목록 + */ + @Autowired + public RssMappingFactory(List availableMappers) { + for (AbstractRssMapper mapper : availableMappers) { + String mapperType = mapper.getMapperType(); + mappers.put(mapperType, mapper); + } + } + + /** + * 타입에 맞게 mapper 반환 + * + * @param type 매퍼 타입 (소문자 언론사 코드) + * @return 해당 타입의 RSS 매퍼 + * @throws IllegalArgumentException 지원하지 않는 타입인 경우 + */ + public AbstractRssMapper getMapper(String type) { + AbstractRssMapper mapper = mappers.get(type); + if (mapper == null) { + throw new IllegalArgumentException("Mapper 없음: " + type); + } + return mapper; + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/DongaRssMapper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/DongaRssMapper.java new file mode 100644 index 0000000..f34f2bd --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/DongaRssMapper.java @@ -0,0 +1,81 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper.implement; + +import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; +import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.batch.RssSource; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper.AbstractRssMapper; +import com.rometools.rome.feed.synd.SyndCategory; +import com.rometools.rome.feed.synd.SyndEntry; + +import org.springframework.stereotype.Component; + +import java.util.stream.Collectors; + +/** + * 동아일보 RSS 매퍼 구현체 + * + * @author 양병학 + * @since 2025-05-10 최초 작성 + * @modified 2025-05-13 AbstractRssMapper 상속 구조로 변경 및 활성화 + */ +@Component +public class DongaRssMapper extends AbstractRssMapper { + + /** + * 매퍼 타입 반환 + * + * @return 매퍼 타입 (da) + */ + @Override + public String getMapperType() { + return "da"; + } + + /** + * GUID 추출, URI를 GUID로 사용 + * + * @param entry RSS 항목 + * @param source RSS 소스 정보 + * @return URI 또는 생성된 고유 ID + */ + @Override + protected String extractGuid(SyndEntry entry, RssSource source) { + String uniqueId = extractUniqueIdFromLink(entry.getLink()); + return source.getCodePrefix() + uniqueId; + } + + /** + * 동아일보 링크에서 고유 ID 추출 + * + * @param link 기사 링크 + * @return 추출된 고유 ID + */ + private String extractUniqueIdFromLink(String link) { + if (link == null) { + return String.valueOf(System.currentTimeMillis()); + } + + try { + String[] parts = link.split("/"); + if (parts.length >= 2) { + return parts[parts.length - 2]; + } + } catch (Exception e) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.ITEM_MAPPING_ERROR); + } + + return String.valueOf(System.currentTimeMillis()); + } + + /** + * 카테고리 enum 정보 추출 + * + * @param entry RSS 항목 + * @param source RSS 소스 정보 + * @return 결합된 카테고리 문자열 + */ + @Override + protected String extractCategory(SyndEntry entry, RssSource source) { + return source.getCategoryName(); + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/KhanRssMapper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/KhanRssMapper.java new file mode 100644 index 0000000..c90d439 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/KhanRssMapper.java @@ -0,0 +1,128 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper.implement; + +import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; +import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.batch.RssSource; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper.AbstractRssMapper; +import com.rometools.rome.feed.synd.SyndCategory; +import com.rometools.rome.feed.synd.SyndEntry; + +import org.springframework.stereotype.Component; + +import java.time.LocalDateTime; +import java.util.stream.Collectors; + +/** + * 경향신문 RSS 매퍼 구현체 + * + * @author 양병학 + * @since 2025-05-10 최초 작성 + * @modified 2025-05-13 AbstractRssMapper 상속 구조로 변경 및 활성화 + */ +@Component +public class KhanRssMapper extends AbstractRssMapper { + + /** + * 매퍼 타입 반환 + * + * @return 매퍼 타입 (kh) + */ + @Override + public String getMapperType() { + return "kh"; + } + + /** + * GUID 추출, 링크에서 기사 ID를 추출하여 사용 + * + * @param entry RSS 항목 + * @param source RSS 소스 정보 + * @return 신문사 코드 + 기사 ID 형태의 GUID + */ + @Override + protected String extractGuid(SyndEntry entry, RssSource source) { + String uniqueId = extractUniqueIdFromLink(entry.getLink()); + return source.getCodePrefix() + uniqueId; + } + + /** + * 경향신문 링크에서 고유 ID 추출 + * + * @param link 기사 링크 + * @return 추출된 고유 ID + */ + private String extractUniqueIdFromLink(String link) { + if (link == null) { + return String.valueOf(System.currentTimeMillis()); + } + + try { + String[] parts = link.split("/"); + for (int i = 0; i < parts.length; i++) { + if ("article".equals(parts[i]) && i + 1 < parts.length) { + return parts[i + 1]; + } + } + } catch (Exception e) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.ITEM_MAPPING_ERROR); + } + + return String.valueOf(System.currentTimeMillis()); + } + + /** + * 발행일 추출, 경향신문은 dc:date 태그도 확인 + * + * @param entry RSS 항목 + * @return 발행일 LocalDateTime + */ + @Override + protected LocalDateTime extractPubDate(SyndEntry entry) { + if (entry.getPublishedDate() != null) { + return convertToLocalDateTime(entry.getPublishedDate()); + } + + return extractDcDate(entry); + } + + /** + * Dublin Core date 태그에서 발행일 추출 + * + * @param entry RSS 항목 + * @return 추출된 발행일, 없으면 현재 시간 + */ + private LocalDateTime extractDcDate(SyndEntry entry) { + return entry.getForeignMarkup().stream() + .filter(element -> "date".equals(element.getName()) && + "dc".equals(element.getNamespacePrefix())) + .findFirst() + .map(element -> parseDateTime(element.getValue())) + .orElse(LocalDateTime.now()); + } + + /** + * 문자열을 LocalDateTime으로 파싱 + * + * @param dateString 날짜 문자열 + * @return 파싱된 LocalDateTime, 실패 시 현재 시간 + */ + private LocalDateTime parseDateTime(String dateString) { + try { + return LocalDateTime.parse(dateString); + } catch (Exception e) { + return LocalDateTime.now(); + } + } + + /** + * 카테고리 enum 정보 추출 + * + * @param entry RSS 항목 + * @param source RSS 소스 정보 + * @return 결합된 카테고리 문자열 + */ + @Override + protected String extractCategory(SyndEntry entry, RssSource source) { + return source.getCategoryName(); + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/KmibRssMapper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/KmibRssMapper.java new file mode 100644 index 0000000..6f56310 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/KmibRssMapper.java @@ -0,0 +1,64 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper.implement; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.batch.RssSource; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper.AbstractRssMapper; +import com.rometools.rome.feed.synd.SyndEntry; + +import org.springframework.stereotype.Component; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * 국민일보 RSS 매퍼 구현체 + * + * @author 양병학 + * @since 2025-05-10 최초 작성 + * @modified 2025-05-13 AbstractRssMapper 상속 구조로 변경 + */ +@Component +public class KmibRssMapper extends AbstractRssMapper { + + private static final Pattern ARCID_PATTERN = Pattern.compile("arcid=([0-9]+)"); + + /** + * 매퍼 타입 반환 + * + * @return 매퍼 타입 (km) + */ + @Override + public String getMapperType() { + return "km"; + } + + /** + * GUID 추출, 링크에서 arcid를 추출하여 생성 + * + * @param entry RSS 항목 + * @param source RSS 소스 정보 + * @return 형식: [언론사코드][arcid] + */ + @Override + protected String extractGuid(SyndEntry entry, RssSource source) { + String arcId = extractArcIdFromLink(entry.getLink()); + return source.getCodePrefix() + arcId; + } + + /** + * 링크에서 arcid 값 추출 + * + * @param link 기사 링크 + * @return 추출된 arcid, 없으면 링크 그대로 반환 + */ + private String extractArcIdFromLink(String link) { + if (link == null) { + return ""; + } + + Matcher matcher = ARCID_PATTERN.matcher(link); + if (matcher.find()) { + return matcher.group(1); + } + return link; + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/writer/ArticleWriter.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/writer/ArticleWriter.java new file mode 100644 index 0000000..79bb31a --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/writer/ArticleWriter.java @@ -0,0 +1,68 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.writer; + +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; + +import org.springframework.batch.item.Chunk; +import org.springframework.batch.item.ItemWriter; +import org.springframework.dao.DataIntegrityViolationException; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.repository.RssNewsRepository; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +/** + * 기사 데이터를 DB에 저장하는 Spring Batch ItemWriter 구현체. + * 중복된 링크는 저장하지 않으며, 새롭게 저장된 기사 수를 로그로 출력한다. + * + * - 입력: 기사 리스트(List) + * - 처리: 중복 여부 확인 후 저장 + * - 출력: 로그 출력 (중복 제외) + * + * @since 2025-05-10 + */ +@Slf4j +@Component +@RequiredArgsConstructor +public class ArticleWriter implements ItemWriter> { + + private final RssNewsRepository rssNewsRepository; + + /** + * 기사 리스트를 저장하며, 중복된 기사는 건너뛴다. + * 저장 성공 시 개수를 집계하고, 로그로 남긴다. + * + * @param chunk Spring Batch가 전달하는 기사 리스트 Chunk + * @since 2025-05-10 + * @author 함예정 + */ + @Override + public void write(Chunk> chunk) { + AtomicInteger savedCount = new AtomicInteger(); + chunk.getItems().stream() + .flatMap(List::stream) + .filter(item -> !rssNewsRepository.existsByLink(item.getLink())) + .forEach(item -> {saveItem(item, savedCount);}); + log.info("새로 저장된 뉴스 개수: {}", savedCount.get()); + } + + /*** + * DB에 뉴스를 저장하고, 저장된 개수를 증가시킵니다. + * + * @param item 저장할 뉴스 + * @param savedCount 저장된 갯수 + * @author 함예정 + * @since 2025-05-12 + */ + private void saveItem(ArticleEntity item, AtomicInteger savedCount) { + try { + rssNewsRepository.save(item); + savedCount.incrementAndGet(); + } catch (DataIntegrityViolationException e) { + log.debug("중복 항목 감지: {}", item.getLink()); + } + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/entity/RssNews.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/entity/ArticleEntity.java similarity index 80% rename from src/main/java/com/likelion/backendplus4/talkpick/batch/rss/entity/RssNews.java rename to src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/entity/ArticleEntity.java index 3263f3f..9d8e612 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/entity/RssNews.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/entity/ArticleEntity.java @@ -1,4 +1,4 @@ -package com.likelion.backendplus4.talkpick.batch.rss.entity; +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity; import jakarta.persistence.*; import lombok.*; @@ -17,14 +17,14 @@ * @EqualsAndHashCode 지정으로 갹채 비교 최적화 */ @Entity -@Table(name = "rss", uniqueConstraints = @UniqueConstraint(columnNames = {"link"})) +@Table(name = "article", uniqueConstraints = @UniqueConstraint(columnNames = {"link"})) @Getter @NoArgsConstructor @AllArgsConstructor @Builder @ToString(exclude = "description") @EqualsAndHashCode(of = "id") -public class RssNews { +public class ArticleEntity { @Id @GeneratedValue(strategy = GenerationType.IDENTITY) @@ -61,4 +61,9 @@ public class RssNews { protected void onCreate() { createdAt = LocalDateTime.now(); } + + public String getDescription(){ + return description != null ? description : ""; + } + } \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/RssNewsRepository.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/RssNewsRepository.java new file mode 100644 index 0000000..a4b327a --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/RssNewsRepository.java @@ -0,0 +1,12 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.repository; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; + +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.stereotype.Repository; + +@Repository +public interface RssNewsRepository extends JpaRepository { + + boolean existsByLink(String link); +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/presentation/controller/ArticleCollectorController.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/presentation/controller/ArticleCollectorController.java new file mode 100644 index 0000000..59c369c --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/presentation/controller/ArticleCollectorController.java @@ -0,0 +1,51 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.presentation.controller; + +import static com.likelion.backendplus4.talkpick.batch.common.response.ApiResponse.*; + +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.DeleteMapping; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +import com.likelion.backendplus4.talkpick.batch.common.response.ApiResponse; +import com.likelion.backendplus4.talkpick.batch.news.article.application.port.in.ArticleCollectorUseCase; +import com.likelion.backendplus4.talkpick.batch.news.article.application.service.dto.ArticleCollectorStatusResponse; + +import lombok.RequiredArgsConstructor; + +/** + * 뉴스 수집 스케줄러 컨트롤러. + * 수집기 실행 및 정지를 위한 API 엔드포인트를 제공한다. + * 내부적으로 {@link ArticleCollectorUseCase}를 호출하여 작업을 위임한다. + */ +@RestController +@RequiredArgsConstructor +@RequestMapping("/news/collector") +public class ArticleCollectorController { + private final ArticleCollectorUseCase articleCollectorUsecase; + + /** + * 뉴스 RSS 수집을 시작한다. + * + * @return 수집기 상태 응답 (실행 여부 및 메시지 포함) + * @since 2025-05-10 + * @author 함예정 + */ + @PostMapping("/start") + public ResponseEntity> start() { + return success(articleCollectorUsecase.start()); + } + + /** + * 뉴스 RSS 수집을 정지한다. + * + * @return 수집기 상태 응답 (정지 여부 및 메시지 포함) + * @since 2025-05-10 + * @author 함예정 + */ + @DeleteMapping("/stop") + public ResponseEntity> stop() { + return success(articleCollectorUsecase.stop()); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/exception/RssErrorCode.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/exception/RssErrorCode.java deleted file mode 100644 index d790111..0000000 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/exception/RssErrorCode.java +++ /dev/null @@ -1,37 +0,0 @@ -package com.likelion.backendplus4.talkpick.batch.rss.exception; - -import lombok.Getter; -import lombok.RequiredArgsConstructor; - -/** - * RSS 관련 오류 코드를 정의하는 열거형 - * - * @author 양병학 - * @since 2025-05-10 - */ -@Getter -@RequiredArgsConstructor -public enum RssErrorCode { - - // 일반 오류 - UNKNOWN_ERROR("RSS-E001", "알 수 없는 오류가 발생했습니다."), - - // RSS 정보 로드 관련 오류 - FEED_CONNECTION_ERROR("RSS-E101", "RSS 피드 연결 중 오류가 발생했습니다."), - FEED_PARSING_ERROR("RSS-E102", "RSS 피드 파싱 중 오류가 발생했습니다."), - FEED_TIMEOUT_ERROR("RSS-E103", "RSS 피드 로드 중 시간 초과가 발생했습니다."), - - // Mapper 관련 오류 - MAPPER_NOT_FOUND("RSS-E201", "요청한 매퍼를 찾을 수 없습니다."), - ITEM_MAPPING_ERROR("RSS-E202", "RSS 항목 매핑 중 오류가 발생했습니다."), - - // 데이터베이스 관련 오류 - DB_SAVE_ERROR("RSS-E301", "RSS 뉴스를 저장하는 중 오류가 발생했습니다."), - DUPLICATE_LINK_ERROR("RSS-E302", "이미 존재하는 링크입니다."), - - // 스케줄러 관련 오류 - SCHEDULER_EXECUTION_ERROR("RSS-E401", "스케줄러 실행 중 오류가 발생했습니다."); - - private final String code; - private final String message; -} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/exception/RssException.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/exception/RssException.java deleted file mode 100644 index 2885adc..0000000 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/exception/RssException.java +++ /dev/null @@ -1,62 +0,0 @@ -package com.likelion.backendplus4.talkpick.batch.rss.exception; - -import lombok.Getter; - -/** - * RSS 예외처리 클래스 - * - * @author 양병학 - * @since 2025-05-10 - */ -@Getter -public class RssException extends RuntimeException { - - private final RssErrorCode errorCode; - - public RssException(RssErrorCode errorCode) { - super(errorCode.getMessage()); - this.errorCode = errorCode; - } - - /** - * 상세 메시지 생성자 - * - * @param errorCode 오류 코드 - * @param message 상세 메시지 - */ - public RssException(RssErrorCode errorCode, String message) { - super(message); - this.errorCode = errorCode; - } - - /** - * 원인 예외 생성자 - * - * @param errorCode 오류 코드 - * @param cause 원인 예외 - */ - public RssException(RssErrorCode errorCode, Throwable cause) { - super(errorCode.getMessage(), cause); - this.errorCode = errorCode; - } - - /** - * 상세 메시지및 원인 예외 생성자 - * - * @param errorCode 오류 코드 - * @param message 상세 메시지 - * @param cause 원인 예외 - */ - public RssException(RssErrorCode errorCode, String message, Throwable cause) { - super(message, cause); - this.errorCode = errorCode; - } - - /** - * 오류 코드와 메시지를 포함한 문자열 반환 - */ - @Override - public String toString() { - return String.format("[%s] %s", errorCode.getCode(), getMessage()); - } -} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/model/RssSource.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/model/RssSource.java deleted file mode 100644 index 73e0339..0000000 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/model/RssSource.java +++ /dev/null @@ -1,84 +0,0 @@ -package com.likelion.backendplus4.talkpick.batch.rss.model; - -import lombok.Getter; - -import java.util.Arrays; -import java.util.List; -import java.util.stream.Collectors; - -/** - * RSS 뉴스 소스와 URL을 정의하는 열거형 - * 각 항목은 언론사, 카테고리, URL 정보를 포함 - * - * @author 양병학 - * @since 2025-05-10 - */ -@Getter -public enum RssSource { - // 국민일보 RSS 피드 - KMIB_POLITICS("국민일보", "정치", "https://www.kmib.co.kr/rss/data/kmibPolRss.xml", "km", true), - KMIB_ECONOMY("국민일보", "경제", "https://www.kmib.co.kr/rss/data/kmibEcoRss.xml", "km", true), - KMIB_SOCIETY("국민일보", "사회", "https://www.kmib.co.kr/rss/data/kmibSocRss.xml", "km", true), - KMIB_INTERNATIONAL("국민일보", "국제", "https://www.kmib.co.kr/rss/data/kmibIntRss.xml", "km", true), - KMIB_ENTERTAINMENT("국민일보", "연예", "https://www.kmib.co.kr/rss/data/kmibEntRss.xml", "km", true), - KMIB_SPORTS("국민일보", "스포츠", "https://www.kmib.co.kr/rss/data/kmibSpoRss.xml", "km", true), - - // 동아일보 RSS 피드 - 현재 비활성화 - DONGA_TOTAL("동아일보", "전체", "https://rss.donga.com/total.xml", "da", false), - - // 경향신문 RSS 피드 - 현재 비활성화 - KHAN_TOTAL("경향신문", "전체", "https://www.khan.co.kr/rss/rssdata/total_news.xml", "kh", false); - - private final String publisherName; - private final String categoryName; - private final String url; - private final String mapperType; - private final boolean enabled; - - RssSource(String publisherName, String categoryName, String url, String mapperType, boolean enabled) { - this.publisherName = publisherName; - this.categoryName = categoryName; - this.url = url; - this.mapperType = mapperType; - this.enabled = enabled; - } - - /** - * 언론사 이름과 카테고리를 결합한 표시 이름 반환 - */ - public String getDisplayName() { - return publisherName + "-" + categoryName; - } - - /** - * 매퍼 타입 키 반환 - */ - public String getMapperType() { - return mapperType; - } - - /** - * 언론사 코드 접두사 반환 (대문자) - */ - public String getCodePrefix() { - return mapperType.toUpperCase(); - } - - /** - * 활성화된 모든 소스 반환 - */ - public static List getEnabledSources() { - return Arrays.stream(values()) - .filter(RssSource::isEnabled) - .collect(Collectors.toList()); - } - - /** - * 특정 언론사의 모든 소스 반환 - */ - public static List getSourcesByPublisher(String publisherName) { - return Arrays.stream(values()) - .filter(source -> source.getPublisherName().equals(publisherName)) - .collect(Collectors.toList()); - } -} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/repository/RssNewsRepository.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/repository/RssNewsRepository.java deleted file mode 100644 index 965740a..0000000 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/repository/RssNewsRepository.java +++ /dev/null @@ -1,11 +0,0 @@ -package com.likelion.backendplus4.talkpick.batch.rss.repository; - -import com.likelion.backendplus4.talkpick.batch.rss.entity.RssNews; -import org.springframework.data.jpa.repository.JpaRepository; -import org.springframework.stereotype.Repository; - -@Repository -public interface RssNewsRepository extends JpaRepository { - - boolean existsByLink(String link); -} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/scheduler/RssScheduler.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/scheduler/RssScheduler.java deleted file mode 100644 index 26c142c..0000000 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/scheduler/RssScheduler.java +++ /dev/null @@ -1,55 +0,0 @@ -package com.likelion.backendplus4.talkpick.batch.rss.scheduler; - -import com.likelion.backendplus4.talkpick.batch.rss.exception.RssErrorCode; -import com.likelion.backendplus4.talkpick.batch.rss.exception.RssException; -import com.likelion.backendplus4.talkpick.batch.rss.service.RssService; -import lombok.RequiredArgsConstructor; -import lombok.extern.slf4j.Slf4j; -import org.springframework.scheduling.annotation.EnableScheduling; -import org.springframework.scheduling.annotation.Scheduled; -import org.springframework.stereotype.Component; - -/** - * RSS 피드 수집 작업을 스케줄링하는 클래스 - * - * @author 양병학 - * @since 2025-05-10 최초 작성 - * @modify 2025-05-10 cron 표현식을 설정 파일로 분리하여 유연성 개선 - */ -@Component -@EnableScheduling -@RequiredArgsConstructor -@Slf4j -public class RssScheduler { - - private final RssService rssService; - - /** - * RSS 피드를 정기적으로 수집하는 스케줄 작업 - * application.yml의 rss.scheduler.cron 속성으로 실행 주기 설정 - * 설정이 없을 경우 기본값으로 1분마다 실행 - * - * 1. 서비스를 거쳐서 피드 수집 - * 2. 처리된 항목 수 로깅 - * 3. 예외 발생 시 오류 로깅하고 다음 스케쥴까지 대기 - * - * @since 2025-05-10 최초 작성 - * @modify 2025-05-10 cron 표현식을 application으로 분리 - */ - @Scheduled(cron = "${rss.scheduler.cron:0 */1 * * * ?}") - public void scheduleRssFeedFetch() { - log.info("Quartz 정상시작"); - - try { - int processedItems = rssService.fetchAndProcessAllFeeds(); - log.info("Rss 피드 입력 Processed {} items", processedItems); - } catch (RssException e) { - // 커스텀 예외 처리 - log.error("[{}] 스케줄러 실행 오류: {}", e.getErrorCode().getCode(), e.getMessage(), e); - } catch (Exception e) { - // 기타 예외 처리 - log.error("[{}] 스케줄러 실행 중 예상치 못한 오류: {}", - RssErrorCode.SCHEDULER_EXECUTION_ERROR.getCode(), e.getMessage(), e); - } - } -} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/RssMappingFactory.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/RssMappingFactory.java deleted file mode 100644 index 94e1db9..0000000 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/RssMappingFactory.java +++ /dev/null @@ -1,43 +0,0 @@ -package com.likelion.backendplus4.talkpick.batch.rss.service; - -import com.likelion.backendplus4.talkpick.batch.rss.service.mapper.RssMapper; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.stereotype.Component; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -@Component -public class RssMappingFactory { - - private final Map mappers = new HashMap<>(); - - /** - * 모든 RssMapper 구현체를 자동으로 주입받아 맵에 등록 - * - * @param availableMappers RssMapper 구현체 목록 - */ - @Autowired - public RssMappingFactory(List availableMappers) { - for (RssMapper mapper : availableMappers) { - String mapperType = mapper.getMapperType(); - mappers.put(mapperType, mapper); - } - } - - /** - * 타입에 맞게 mapper 반환 - * - * @param type 매퍼 타입 (소문자 언론사 코드) - * @return 해당 타입의 RSS 매퍼 - * @throws IllegalArgumentException 지원하지 않는 타입인 경우 - */ - public RssMapper getMapper(String type) { - RssMapper mapper = mappers.get(type); - if (mapper == null) { - throw new IllegalArgumentException("Mapper 없음: " + type); - } - return mapper; - } -} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/RssService.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/RssService.java deleted file mode 100644 index 8e36940..0000000 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/RssService.java +++ /dev/null @@ -1,154 +0,0 @@ -package com.likelion.backendplus4.talkpick.batch.rss.service; - -import com.likelion.backendplus4.talkpick.batch.rss.entity.RssNews; -import com.likelion.backendplus4.talkpick.batch.rss.exception.RssErrorCode; -import com.likelion.backendplus4.talkpick.batch.rss.exception.RssException; -import com.likelion.backendplus4.talkpick.batch.rss.model.RssSource; -import com.likelion.backendplus4.talkpick.batch.rss.repository.RssNewsRepository; -import com.likelion.backendplus4.talkpick.batch.rss.service.mapper.RssMapper; -import com.rometools.rome.feed.synd.SyndEntry; -import com.rometools.rome.feed.synd.SyndFeed; -import com.rometools.rome.io.SyndFeedInput; -import com.rometools.rome.io.XmlReader; -import lombok.RequiredArgsConstructor; -import lombok.extern.slf4j.Slf4j; -import org.springframework.dao.DataIntegrityViolationException; -import org.springframework.stereotype.Service; -import org.springframework.transaction.annotation.Transactional; - -import java.net.URL; -import java.util.ArrayList; -import java.util.List; -import java.util.function.Predicate; -import java.util.stream.Collectors; - -import static java.util.function.Predicate.not; - -/** - * RSS 피드를 수집하고 처리하는 서비스 클래스 - * - * @author 양병학 - * @since 2025-05-10 최초 작성 - */ -@Service -@RequiredArgsConstructor -@Slf4j -public class RssService { - - private final RssNewsRepository rssNewsRepository; - private final RssMappingFactory rssMappingFactory; - - /** - * 모든 활성화된 RSS 뉴스 피드를 가져와 처리 - * - * 1. 활성화된 모든 RSS 소스 조회 - * 2. 각 소스에서 RSS 피드 가져오기 - * 3. 수집된 모든 뉴스 항목 병합 - * 4. 데이터베이스에 저장 (중복 제외) - * - * @return 새로 저장된 뉴스 항목 수 - * @since 2025-05-10 최초 작성 - * @author 양병학 - */ - @Transactional - public int fetchAndProcessAllFeeds() { - List allNewsItems = new ArrayList<>(); - - RssSource.getEnabledSources().forEach(source -> { - try { - List items = fetchFeedFromSource(source); - allNewsItems.addAll(items); - } catch (Exception e) { - log.error("Error: 뉴스피드 fetch중 {}-{}: {}", - source.getPublisherName(), source.getCategoryName(), - e.getMessage(), e); - } - }); - - return saveItems(allNewsItems); - } - - /** - * 특정 소스에서 RSS 피드를 가져옴 - * - * @param source RSS 소스 정보 - * @return 변환된 RssNews 엔티티 목록 - * @throws Exception RSS 피드 로드 및 처리 중 발생할 수 있는 예외 - */ - private List fetchFeedFromSource(RssSource source) { - List newsItems = new ArrayList<>(); - try { - URL feedUrl = new URL(source.getUrl()); - SyndFeedInput input = new SyndFeedInput(); - SyndFeed feed; - - try { - feed = input.build(new XmlReader(feedUrl)); - } catch (Exception e) { - throw new RssException(RssErrorCode.FEED_PARSING_ERROR, - "피드 파싱 실패: " + source.getDisplayName(), e); - } - - RssMapper mapper; - try { - mapper = rssMappingFactory.getMapper(source.getMapperType()); - } catch (IllegalArgumentException e) { - throw new RssException(RssErrorCode.MAPPER_NOT_FOUND, - "매퍼 없음: " + source.getMapperType(), e); - } - - for (SyndEntry entry : feed.getEntries()) { - try { - RssNews newsItem = mapper.mapToRssNews(entry, source); - newsItems.add(newsItem); - } catch (Exception e) { - log.warn("[{}] 항목 변환 실패: {} - {}", - RssErrorCode.ITEM_MAPPING_ERROR.getCode(), - source.getDisplayName(), e.getMessage()); - // 한 항목 실패해도 계속 진행 - } - } - - log.info("페치완료 {} 갯수 뉴스피드 {}-{} 에서", - newsItems.size(), source.getPublisherName(), source.getCategoryName()); - } catch (RssException e) { - // 이미 래핑된 RssException은 그대로 로그 - log.error("[{}] 피드 로드 실패: {} - {}", - e.getErrorCode().getCode(), source.getDisplayName(), e.getMessage()); - } catch (Exception e) { - // 다른 예외는 RssException으로 래핑 - log.error("[{}] 피드 로드 실패: {} - {}", - RssErrorCode.FEED_CONNECTION_ERROR.getCode(), - source.getDisplayName(), e.getMessage(), e); - } - return newsItems; - } - - /** - * 수집된 뉴스 항목을 DB에 저장 - * - * @param newsItems 저장할 뉴스 항목 목록 - * @return 새로 저장된 항목 수 - */ - @Transactional - public int saveItems(List newsItems) { - int savedCount = 0; - - List newItems = newsItems.stream() - .filter(not(item -> rssNewsRepository.existsByLink(item.getLink()))) - .collect(Collectors.toList()); - - for (RssNews item : newItems) { - try { - rssNewsRepository.save(item); - savedCount++; - } catch (DataIntegrityViolationException e) { - log.debug("중복 항목 감지: {}", item.getLink()); - } - } - - log.info("{}개 뉴스 저장완료 (새로운 항목: {}, 총 가져온 항목: {})", - savedCount, newItems.size(), newsItems.size()); - return savedCount; - } -} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/mapper/DongaRssMapper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/mapper/DongaRssMapper.java deleted file mode 100644 index 588390a..0000000 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/mapper/DongaRssMapper.java +++ /dev/null @@ -1,54 +0,0 @@ -//package com.likelion.backendplus4.talkpick.batch.rss.service.mapper; -// -//import com.rometools.rome.feed.synd.SyndCategory; -//import com.rometools.rome.feed.synd.SyndEntry; -//import com.likelion.backendplus4.talkpick.batch.rss.entity.RssNews; -//import org.springframework.stereotype.Component; -// -//import java.time.LocalDateTime; -//import java.time.ZoneId; -//import java.util.Date; -//import java.util.stream.Collectors; -// -//@Component -//public class DongaRssMapper implements RssMapper { -// -// /* -// SyndEntry는 Rome 라이브러리에서 RSS 항목 나타내는 객체 -// -// getTitle(): 제목 반환 -// getLink(): 링크 반환 -// getPublishedDate(): 발행일 반환 -// getDescription(): 설명(요약) 반환 -// getCategories(): 카테고리 목록 반환 -// getUri(): 항목의 고유 식별자(제공사 고유번호 2자리 + guid) 반환 -// (예시: KM0028082827 [KM: 국민일보]) -// 몇몇 피드들은 url에서 추출해서 사용 -// -// getForeignMarkup(): RSS 2.0 기본 태그 외의 확장 태그(Dublin Core 등) 접근 -// -// 필요하면 객체 수정해서 사용, CustomEntry -// */ -// -// @Override -// public RssNews mapToRssNews(SyndEntry entry) { -// // category -// String category = entry.getCategories().stream() -// .map(SyndCategory::getName) -// .collect(Collectors.joining(", ")); -// -// return RssNews.builder() -// .title(entry.getTitle()) -// .link(entry.getLink()) -// .pubDate(convertToLocalDateTime(entry.getPublishedDate())) -// .category(category) -// .guid(entry.getUri()) // URI를 GUID로 사용 -// .build(); -// } -// -// private LocalDateTime convertToLocalDateTime(Date date) { -// return date != null -// ? date.toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime() -// : LocalDateTime.now(); -// } -//} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/mapper/KhanRssMapper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/mapper/KhanRssMapper.java deleted file mode 100644 index 21891e6..0000000 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/mapper/KhanRssMapper.java +++ /dev/null @@ -1,55 +0,0 @@ -//package com.likelion.backendplus4.talkpick.batch.rss.service.mapper; -// -//import com.rometools.rome.feed.synd.SyndCategory; -//import com.rometools.rome.feed.synd.SyndEntry; -//import com.likelion.backendplus4.talkpick.batch.rss.entity.RssNews; -//import org.springframework.stereotype.Component; -// -//import java.time.LocalDateTime; -//import java.time.ZoneId; -//import java.util.Date; -//import java.util.stream.Collectors; -// -//@Component -//public class KhanRssMapper implements RssMapper { -// -// @Override -// public RssNews mapToRssNews(SyndEntry entry) { -// // 경향신문 dc:date를 사용 -// LocalDateTime pubDate; -// if (entry.getPublishedDate() != null) { -// pubDate = convertToLocalDateTime(entry.getPublishedDate()); -// } else { -// pubDate = entry.getForeignMarkup().stream() -// .filter(element -> "date".equals(element.getName()) && -// "dc".equals(element.getNamespacePrefix())) -// .findFirst() -// .map(element -> { -// try { -// return LocalDateTime.parse(element.getValue()); -// } catch (Exception e) { -// return LocalDateTime.now(); -// } -// }) -// .orElse(LocalDateTime.now()); -// } -// -// String category = entry.getCategories().stream() -// .map(SyndCategory::getName) -// .collect(Collectors.joining(", ")); -// -// return RssNews.builder() -// .title(entry.getTitle()) -// .link(entry.getLink()) -// .pubDate(pubDate) -// .category(category) -// .guid(entry.getUri()) -// .build(); -// } -// -// private LocalDateTime convertToLocalDateTime(Date date) { -// return date != null -// ? date.toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime() -// : LocalDateTime.now(); -// } -//} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/mapper/KmibRssMapper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/mapper/KmibRssMapper.java deleted file mode 100644 index 8848ea3..0000000 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/mapper/KmibRssMapper.java +++ /dev/null @@ -1,62 +0,0 @@ -package com.likelion.backendplus4.talkpick.batch.rss.service.mapper; - -import com.likelion.backendplus4.talkpick.batch.rss.model.RssSource; -import com.rometools.rome.feed.synd.SyndCategory; -import com.rometools.rome.feed.synd.SyndEntry; -import com.likelion.backendplus4.talkpick.batch.rss.entity.RssNews; -import org.springframework.stereotype.Component; - -import java.time.LocalDateTime; -import java.time.ZoneId; -import java.util.Date; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import java.util.stream.Collectors; - -@Component -public class KmibRssMapper implements RssMapper { - - private static final Pattern ARCID_PATTERN = Pattern.compile("arcid=([0-9]+)"); - - @Override - public RssNews mapToRssNews(SyndEntry entry, RssSource source) { - String arcId = extractArcIdFromLink(entry.getLink()); - String guid = source.getCodePrefix() + arcId; - - String description = ""; - if (entry.getDescription() != null) { - description = entry.getDescription().getValue(); - } - - return RssNews.builder() - .title(entry.getTitle()) - .link(entry.getLink()) - .pubDate(convertToLocalDateTime(entry.getPublishedDate())) - .category(source.getCategoryName()) // Enum에서 직접 카테고리 이름 가져옴 - .guid(guid) - .description(description) - .isSummary(false) - .build(); - } - - @Override - public String getMapperType() { - return "km"; - } - - private String extractArcIdFromLink(String link) { - if (link == null) return ""; - - Matcher matcher = ARCID_PATTERN.matcher(link); - if (matcher.find()) { - return matcher.group(1); - } - return link; - } - - private LocalDateTime convertToLocalDateTime(Date date) { - return null != date - ? date.toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime() - : LocalDateTime.now(); - } -} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/mapper/RssMapper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/mapper/RssMapper.java deleted file mode 100644 index 5c5f871..0000000 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/rss/service/mapper/RssMapper.java +++ /dev/null @@ -1,29 +0,0 @@ -package com.likelion.backendplus4.talkpick.batch.rss.service.mapper; - -import com.likelion.backendplus4.talkpick.batch.rss.entity.RssNews; -import com.likelion.backendplus4.talkpick.batch.rss.model.RssSource; -import com.rometools.rome.feed.synd.SyndEntry; - -/** - * RSS 항목을 RssNews 엔티티로 변환하는 매퍼 인터페이스 - * - * @author 양병학 - * @since 2025-05-10 최초 작성 - */ -public interface RssMapper { - /** - * RSS 피드 항목을 RssNews 엔티티로 변환합니다. - * - * @param entry 변환할 SyndEntry 객체(rss2.0 구조 지원) - * @param source RSS 소스 정보 - * @return 변환된 RssNews 엔티티 - */ - RssNews mapToRssNews(SyndEntry entry, RssSource source); - - /** - * 매퍼 타입을 반환 - * - * @return 매퍼 타입 (소문자 코드) - */ - String getMapperType(); -} \ No newline at end of file diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index 4c91452..0c8069c 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -2,7 +2,7 @@ server: port: ${WEB_PORT:8082} error: whitelabel: - enabled: false + enabled: false spring: ai: @@ -24,6 +24,19 @@ spring: hibernate: format_sql: true open-in-view: false + web: + resources: + add-mappings: false + batch: + job: + enabled: false + jdbc: + initialize-schema: never + quartz: + job-store-type: jdbc + jdbc: + initialize-schema: never + log: rolling: directory: logs @@ -31,10 +44,11 @@ log: pattern: "%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - [TraceId: %X{traceId:-no-trace}] - %msg%n" max-history: 30 total-size-cap: 10MB - web: - resources: - add-mappings: false decorator: datasource: p6spy: - enable-logging: true + enable-logging: false + +article-collector: + quartz: + cron: "0 */1 * * * ?" \ No newline at end of file diff --git a/src/main/resources/batch/schema-mysql.sql b/src/main/resources/batch/schema-mysql.sql new file mode 100644 index 0000000..197ef3f --- /dev/null +++ b/src/main/resources/batch/schema-mysql.sql @@ -0,0 +1,98 @@ +-- Autogenerated: do not edit this file + +CREATE TABLE BATCH_JOB_INSTANCE ( + JOB_INSTANCE_ID BIGINT NOT NULL PRIMARY KEY , + VERSION BIGINT , + JOB_NAME VARCHAR(100) NOT NULL, + JOB_KEY VARCHAR(32) NOT NULL, + constraint JOB_INST_UN unique (JOB_NAME, JOB_KEY) +) ENGINE=InnoDB; + +CREATE TABLE BATCH_JOB_EXECUTION ( + JOB_EXECUTION_ID BIGINT NOT NULL PRIMARY KEY , + VERSION BIGINT , + JOB_INSTANCE_ID BIGINT NOT NULL, + CREATE_TIME DATETIME(6) NOT NULL, + START_TIME DATETIME(6) DEFAULT NULL , + END_TIME DATETIME(6) DEFAULT NULL , + STATUS VARCHAR(10) , + EXIT_CODE VARCHAR(2500) , + EXIT_MESSAGE VARCHAR(2500) , + LAST_UPDATED DATETIME(6), + constraint JOB_INST_EXEC_FK foreign key (JOB_INSTANCE_ID) + references BATCH_JOB_INSTANCE(JOB_INSTANCE_ID) +) ENGINE=InnoDB; + +CREATE TABLE BATCH_JOB_EXECUTION_PARAMS ( + JOB_EXECUTION_ID BIGINT NOT NULL , + PARAMETER_NAME VARCHAR(100) NOT NULL , + PARAMETER_TYPE VARCHAR(100) NOT NULL , + PARAMETER_VALUE VARCHAR(2500) , + IDENTIFYING CHAR(1) NOT NULL , + constraint JOB_EXEC_PARAMS_FK foreign key (JOB_EXECUTION_ID) + references BATCH_JOB_EXECUTION(JOB_EXECUTION_ID) +) ENGINE=InnoDB; + +CREATE TABLE BATCH_STEP_EXECUTION ( + STEP_EXECUTION_ID BIGINT NOT NULL PRIMARY KEY , + VERSION BIGINT NOT NULL, + STEP_NAME VARCHAR(100) NOT NULL, + JOB_EXECUTION_ID BIGINT NOT NULL, + CREATE_TIME DATETIME(6) NOT NULL, + START_TIME DATETIME(6) DEFAULT NULL , + END_TIME DATETIME(6) DEFAULT NULL , + STATUS VARCHAR(10) , + COMMIT_COUNT BIGINT , + READ_COUNT BIGINT , + FILTER_COUNT BIGINT , + WRITE_COUNT BIGINT , + READ_SKIP_COUNT BIGINT , + WRITE_SKIP_COUNT BIGINT , + PROCESS_SKIP_COUNT BIGINT , + ROLLBACK_COUNT BIGINT , + EXIT_CODE VARCHAR(2500) , + EXIT_MESSAGE VARCHAR(2500) , + LAST_UPDATED DATETIME(6), + constraint JOB_EXEC_STEP_FK foreign key (JOB_EXECUTION_ID) + references BATCH_JOB_EXECUTION(JOB_EXECUTION_ID) +) ENGINE=InnoDB; + +CREATE TABLE BATCH_STEP_EXECUTION_CONTEXT ( + STEP_EXECUTION_ID BIGINT NOT NULL PRIMARY KEY, + SHORT_CONTEXT VARCHAR(2500) NOT NULL, + SERIALIZED_CONTEXT TEXT , + constraint STEP_EXEC_CTX_FK foreign key (STEP_EXECUTION_ID) + references BATCH_STEP_EXECUTION(STEP_EXECUTION_ID) +) ENGINE=InnoDB; + +CREATE TABLE BATCH_JOB_EXECUTION_CONTEXT ( + JOB_EXECUTION_ID BIGINT NOT NULL PRIMARY KEY, + SHORT_CONTEXT VARCHAR(2500) NOT NULL, + SERIALIZED_CONTEXT TEXT , + constraint JOB_EXEC_CTX_FK foreign key (JOB_EXECUTION_ID) + references BATCH_JOB_EXECUTION(JOB_EXECUTION_ID) +) ENGINE=InnoDB; + +CREATE TABLE BATCH_STEP_EXECUTION_SEQ ( + ID BIGINT NOT NULL, + UNIQUE_KEY CHAR(1) NOT NULL, + constraint UNIQUE_KEY_UN unique (UNIQUE_KEY) +) ENGINE=InnoDB; + +INSERT INTO BATCH_STEP_EXECUTION_SEQ (ID, UNIQUE_KEY) select * from (select 0 as ID, '0' as UNIQUE_KEY) as tmp where not exists(select * from BATCH_STEP_EXECUTION_SEQ); + +CREATE TABLE BATCH_JOB_EXECUTION_SEQ ( + ID BIGINT NOT NULL, + UNIQUE_KEY CHAR(1) NOT NULL, + constraint UNIQUE_KEY_UN unique (UNIQUE_KEY) +) ENGINE=InnoDB; + +INSERT INTO BATCH_JOB_EXECUTION_SEQ (ID, UNIQUE_KEY) select * from (select 0 as ID, '0' as UNIQUE_KEY) as tmp where not exists(select * from BATCH_JOB_EXECUTION_SEQ); + +CREATE TABLE BATCH_JOB_SEQ ( + ID BIGINT NOT NULL, + UNIQUE_KEY CHAR(1) NOT NULL, + constraint UNIQUE_KEY_UN unique (UNIQUE_KEY) +) ENGINE=InnoDB; + +INSERT INTO BATCH_JOB_SEQ (ID, UNIQUE_KEY) select * from (select 0 as ID, '0' as UNIQUE_KEY) as tmp where not exists(select * from BATCH_JOB_SEQ); diff --git a/src/main/resources/quartz/tables_mysql_innodb.sql b/src/main/resources/quartz/tables_mysql_innodb.sql new file mode 100644 index 0000000..8968c23 --- /dev/null +++ b/src/main/resources/quartz/tables_mysql_innodb.sql @@ -0,0 +1,179 @@ +# +# In your Quartz properties file, you'll need to set +# org.quartz.jobStore.driverDelegateClass = org.quartz.impl.jdbcjobstore.StdJDBCDelegate +# +# +# By: Ron Cordell - roncordell +# I didn't see this anywhere, so I thought I'd post it here. This is the script from Quartz to create the tables in a MySQL database, modified to use INNODB instead of MYISAM. + +DROP TABLE IF EXISTS QRTZ_FIRED_TRIGGERS; +DROP TABLE IF EXISTS QRTZ_PAUSED_TRIGGER_GRPS; +DROP TABLE IF EXISTS QRTZ_SCHEDULER_STATE; +DROP TABLE IF EXISTS QRTZ_LOCKS; +DROP TABLE IF EXISTS QRTZ_SIMPLE_TRIGGERS; +DROP TABLE IF EXISTS QRTZ_SIMPROP_TRIGGERS; +DROP TABLE IF EXISTS QRTZ_CRON_TRIGGERS; +DROP TABLE IF EXISTS QRTZ_BLOB_TRIGGERS; +DROP TABLE IF EXISTS QRTZ_TRIGGERS; +DROP TABLE IF EXISTS QRTZ_JOB_DETAILS; +DROP TABLE IF EXISTS QRTZ_CALENDARS; + +CREATE TABLE QRTZ_JOB_DETAILS( +SCHED_NAME VARCHAR(120) NOT NULL, +JOB_NAME VARCHAR(190) NOT NULL, +JOB_GROUP VARCHAR(190) NOT NULL, +DESCRIPTION VARCHAR(250) NULL, +JOB_CLASS_NAME VARCHAR(250) NOT NULL, +IS_DURABLE VARCHAR(1) NOT NULL, +IS_NONCONCURRENT VARCHAR(1) NOT NULL, +IS_UPDATE_DATA VARCHAR(1) NOT NULL, +REQUESTS_RECOVERY VARCHAR(1) NOT NULL, +JOB_DATA BLOB NULL, +PRIMARY KEY (SCHED_NAME,JOB_NAME,JOB_GROUP)) +ENGINE=InnoDB; + +CREATE TABLE QRTZ_TRIGGERS ( +SCHED_NAME VARCHAR(120) NOT NULL, +TRIGGER_NAME VARCHAR(190) NOT NULL, +TRIGGER_GROUP VARCHAR(190) NOT NULL, +JOB_NAME VARCHAR(190) NOT NULL, +JOB_GROUP VARCHAR(190) NOT NULL, +DESCRIPTION VARCHAR(250) NULL, +NEXT_FIRE_TIME BIGINT(13) NULL, +PREV_FIRE_TIME BIGINT(13) NULL, +PRIORITY INTEGER NULL, +TRIGGER_STATE VARCHAR(16) NOT NULL, +TRIGGER_TYPE VARCHAR(8) NOT NULL, +START_TIME BIGINT(13) NOT NULL, +END_TIME BIGINT(13) NULL, +CALENDAR_NAME VARCHAR(190) NULL, +MISFIRE_INSTR SMALLINT(2) NULL, +JOB_DATA BLOB NULL, +PRIMARY KEY (SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP), +FOREIGN KEY (SCHED_NAME,JOB_NAME,JOB_GROUP) +REFERENCES QRTZ_JOB_DETAILS(SCHED_NAME,JOB_NAME,JOB_GROUP)) +ENGINE=InnoDB; + +CREATE TABLE QRTZ_SIMPLE_TRIGGERS ( +SCHED_NAME VARCHAR(120) NOT NULL, +TRIGGER_NAME VARCHAR(190) NOT NULL, +TRIGGER_GROUP VARCHAR(190) NOT NULL, +REPEAT_COUNT BIGINT(7) NOT NULL, +REPEAT_INTERVAL BIGINT(12) NOT NULL, +TIMES_TRIGGERED BIGINT(10) NOT NULL, +PRIMARY KEY (SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP), +FOREIGN KEY (SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP) +REFERENCES QRTZ_TRIGGERS(SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP)) +ENGINE=InnoDB; + +CREATE TABLE QRTZ_CRON_TRIGGERS ( +SCHED_NAME VARCHAR(120) NOT NULL, +TRIGGER_NAME VARCHAR(190) NOT NULL, +TRIGGER_GROUP VARCHAR(190) NOT NULL, +CRON_EXPRESSION VARCHAR(120) NOT NULL, +TIME_ZONE_ID VARCHAR(80), +PRIMARY KEY (SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP), +FOREIGN KEY (SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP) +REFERENCES QRTZ_TRIGGERS(SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP)) +ENGINE=InnoDB; + +CREATE TABLE QRTZ_SIMPROP_TRIGGERS + ( + SCHED_NAME VARCHAR(120) NOT NULL, + TRIGGER_NAME VARCHAR(190) NOT NULL, + TRIGGER_GROUP VARCHAR(190) NOT NULL, + STR_PROP_1 VARCHAR(512) NULL, + STR_PROP_2 VARCHAR(512) NULL, + STR_PROP_3 VARCHAR(512) NULL, + INT_PROP_1 INT NULL, + INT_PROP_2 INT NULL, + LONG_PROP_1 BIGINT NULL, + LONG_PROP_2 BIGINT NULL, + DEC_PROP_1 NUMERIC(13,4) NULL, + DEC_PROP_2 NUMERIC(13,4) NULL, + BOOL_PROP_1 VARCHAR(1) NULL, + BOOL_PROP_2 VARCHAR(1) NULL, + PRIMARY KEY (SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP), + FOREIGN KEY (SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP) + REFERENCES QRTZ_TRIGGERS(SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP)) +ENGINE=InnoDB; + +CREATE TABLE QRTZ_BLOB_TRIGGERS ( +SCHED_NAME VARCHAR(120) NOT NULL, +TRIGGER_NAME VARCHAR(190) NOT NULL, +TRIGGER_GROUP VARCHAR(190) NOT NULL, +BLOB_DATA BLOB NULL, +PRIMARY KEY (SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP), +INDEX (SCHED_NAME,TRIGGER_NAME, TRIGGER_GROUP), +FOREIGN KEY (SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP) +REFERENCES QRTZ_TRIGGERS(SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP)) +ENGINE=InnoDB; + +CREATE TABLE QRTZ_CALENDARS ( +SCHED_NAME VARCHAR(120) NOT NULL, +CALENDAR_NAME VARCHAR(190) NOT NULL, +CALENDAR BLOB NOT NULL, +PRIMARY KEY (SCHED_NAME,CALENDAR_NAME)) +ENGINE=InnoDB; + +CREATE TABLE QRTZ_PAUSED_TRIGGER_GRPS ( +SCHED_NAME VARCHAR(120) NOT NULL, +TRIGGER_GROUP VARCHAR(190) NOT NULL, +PRIMARY KEY (SCHED_NAME,TRIGGER_GROUP)) +ENGINE=InnoDB; + +CREATE TABLE QRTZ_FIRED_TRIGGERS ( +SCHED_NAME VARCHAR(120) NOT NULL, +ENTRY_ID VARCHAR(95) NOT NULL, +TRIGGER_NAME VARCHAR(190) NOT NULL, +TRIGGER_GROUP VARCHAR(190) NOT NULL, +INSTANCE_NAME VARCHAR(190) NOT NULL, +FIRED_TIME BIGINT(13) NOT NULL, +SCHED_TIME BIGINT(13) NOT NULL, +PRIORITY INTEGER NOT NULL, +STATE VARCHAR(16) NOT NULL, +JOB_NAME VARCHAR(190) NULL, +JOB_GROUP VARCHAR(190) NULL, +IS_NONCONCURRENT VARCHAR(1) NULL, +REQUESTS_RECOVERY VARCHAR(1) NULL, +PRIMARY KEY (SCHED_NAME,ENTRY_ID)) +ENGINE=InnoDB; + +CREATE TABLE QRTZ_SCHEDULER_STATE ( +SCHED_NAME VARCHAR(120) NOT NULL, +INSTANCE_NAME VARCHAR(190) NOT NULL, +LAST_CHECKIN_TIME BIGINT(13) NOT NULL, +CHECKIN_INTERVAL BIGINT(13) NOT NULL, +PRIMARY KEY (SCHED_NAME,INSTANCE_NAME)) +ENGINE=InnoDB; + +CREATE TABLE QRTZ_LOCKS ( +SCHED_NAME VARCHAR(120) NOT NULL, +LOCK_NAME VARCHAR(40) NOT NULL, +PRIMARY KEY (SCHED_NAME,LOCK_NAME)) +ENGINE=InnoDB; + +CREATE INDEX IDX_QRTZ_J_REQ_RECOVERY ON QRTZ_JOB_DETAILS(SCHED_NAME,REQUESTS_RECOVERY); +CREATE INDEX IDX_QRTZ_J_GRP ON QRTZ_JOB_DETAILS(SCHED_NAME,JOB_GROUP); + +CREATE INDEX IDX_QRTZ_T_J ON QRTZ_TRIGGERS(SCHED_NAME,JOB_NAME,JOB_GROUP); +CREATE INDEX IDX_QRTZ_T_JG ON QRTZ_TRIGGERS(SCHED_NAME,JOB_GROUP); +CREATE INDEX IDX_QRTZ_T_C ON QRTZ_TRIGGERS(SCHED_NAME,CALENDAR_NAME); +CREATE INDEX IDX_QRTZ_T_G ON QRTZ_TRIGGERS(SCHED_NAME,TRIGGER_GROUP); +CREATE INDEX IDX_QRTZ_T_STATE ON QRTZ_TRIGGERS(SCHED_NAME,TRIGGER_STATE); +CREATE INDEX IDX_QRTZ_T_N_STATE ON QRTZ_TRIGGERS(SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP,TRIGGER_STATE); +CREATE INDEX IDX_QRTZ_T_N_G_STATE ON QRTZ_TRIGGERS(SCHED_NAME,TRIGGER_GROUP,TRIGGER_STATE); +CREATE INDEX IDX_QRTZ_T_NEXT_FIRE_TIME ON QRTZ_TRIGGERS(SCHED_NAME,NEXT_FIRE_TIME); +CREATE INDEX IDX_QRTZ_T_NFT_ST ON QRTZ_TRIGGERS(SCHED_NAME,TRIGGER_STATE,NEXT_FIRE_TIME); +CREATE INDEX IDX_QRTZ_T_NFT_MISFIRE ON QRTZ_TRIGGERS(SCHED_NAME,MISFIRE_INSTR,NEXT_FIRE_TIME); +CREATE INDEX IDX_QRTZ_T_NFT_ST_MISFIRE ON QRTZ_TRIGGERS(SCHED_NAME,MISFIRE_INSTR,NEXT_FIRE_TIME,TRIGGER_STATE); +CREATE INDEX IDX_QRTZ_T_NFT_ST_MISFIRE_GRP ON QRTZ_TRIGGERS(SCHED_NAME,MISFIRE_INSTR,NEXT_FIRE_TIME,TRIGGER_GROUP,TRIGGER_STATE); + +CREATE INDEX IDX_QRTZ_FT_TRIG_INST_NAME ON QRTZ_FIRED_TRIGGERS(SCHED_NAME,INSTANCE_NAME); +CREATE INDEX IDX_QRTZ_FT_INST_JOB_REQ_RCVRY ON QRTZ_FIRED_TRIGGERS(SCHED_NAME,INSTANCE_NAME,REQUESTS_RECOVERY); +CREATE INDEX IDX_QRTZ_FT_J_G ON QRTZ_FIRED_TRIGGERS(SCHED_NAME,JOB_NAME,JOB_GROUP); +CREATE INDEX IDX_QRTZ_FT_JG ON QRTZ_FIRED_TRIGGERS(SCHED_NAME,JOB_GROUP); +CREATE INDEX IDX_QRTZ_FT_T_G ON QRTZ_FIRED_TRIGGERS(SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP); +CREATE INDEX IDX_QRTZ_FT_TG ON QRTZ_FIRED_TRIGGERS(SCHED_NAME,TRIGGER_GROUP); + +commit; From ad7f9d01e3c55ac55e58f65bfeef32fc45b96e3d Mon Sep 17 00:00:00 2001 From: JUNG ANSIK Date: Wed, 14 May 2025 10:01:15 +0900 Subject: [PATCH 15/36] =?UTF-8?q?=E2=9C=A8=C2=A0=20Feature:=20#54-?= =?UTF-8?q?=EB=89=B4=EC=8A=A4-=EB=8D=B0=EC=9D=B4=ED=84=B0-=EC=88=98?= =?UTF-8?q?=EC=A7=91-=ED=8F=AC=ED=8A=B8-=EA=B7=9C=EA=B2=A9-=EA=B0=9C?= =?UTF-8?q?=EB=B0=9C=20(#55)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ✨  Feature/#54-뉴스-데이터-수집-포트-규격-개발 * ♻️ Refactor: Port 메서드 수정 * ♻️ Refactor: 도메인 객체 내부 Category필드 추가 --- .../port/out/NewsInfoProviderPort.java | 9 +++++++++ .../batch/index/domain/model/NewsInfo.java | 17 +++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/index/application/port/out/NewsInfoProviderPort.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/index/domain/model/NewsInfo.java diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/application/port/out/NewsInfoProviderPort.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/application/port/out/NewsInfoProviderPort.java new file mode 100644 index 0000000..97fc87e --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/application/port/out/NewsInfoProviderPort.java @@ -0,0 +1,9 @@ +package com.likelion.backendplus4.talkpick.batch.index.application.port.out; + +import java.util.List; + +import com.likelion.backendplus4.talkpick.batch.index.domain.model.NewsInfo; + +public interface NewsInfoProviderPort { + List fetchAll(); +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/domain/model/NewsInfo.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/domain/model/NewsInfo.java new file mode 100644 index 0000000..9091523 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/domain/model/NewsInfo.java @@ -0,0 +1,17 @@ +package com.likelion.backendplus4.talkpick.batch.index.domain.model; + +import java.time.LocalDateTime; + +import lombok.Getter; +import lombok.RequiredArgsConstructor; + +@RequiredArgsConstructor +@Getter +public class NewsInfo{ + private final String newsId; + private final String title; + private final String content; + private final LocalDateTime publishedAt; + private final String imageUrl; + private final String category; + } From 05293a26099ebf864d07c06d6ae3d69c09b8ee34 Mon Sep 17 00:00:00 2001 From: "Yejeong, Ham" Date: Wed, 14 May 2025 10:04:10 +0900 Subject: [PATCH 16/36] =?UTF-8?q?=F0=9F=93=A6=20Chore:=20docker=20?= =?UTF-8?q?=EB=B0=B0=ED=8F=AC=20=EC=9D=B4=EC=8A=88=20=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/batch-dev-deploy.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/batch-dev-deploy.yml b/.github/workflows/batch-dev-deploy.yml index 8bb496d..f3e11f9 100644 --- a/.github/workflows/batch-dev-deploy.yml +++ b/.github/workflows/batch-dev-deploy.yml @@ -38,6 +38,4 @@ jobs: - name: Restart Docker run: | cd /deploy - docker-compose stop batch - docker-compose rm -f batch - docker-compose up -d --build batch + docker-compose up -d --build --force-recreate batch From b7dccd43ba1d797c0ced0b17095451b5af03b25d Mon Sep 17 00:00:00 2001 From: "Yejeong, Ham" Date: Thu, 15 May 2025 15:46:07 +0900 Subject: [PATCH 17/36] =?UTF-8?q?=F0=9F=93=A6=20Chore:=20application.yml?= =?UTF-8?q?=20=EC=9B=B9=20=ED=8F=AC=ED=8A=B8=20=EC=84=A4=EC=A0=95=20?= =?UTF-8?q?=EB=B3=80=EA=B2=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/resources/application.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index 0c8069c..ca0293e 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -1,5 +1,5 @@ server: - port: ${WEB_PORT:8082} + port: ${BATCH_WEB_PORT:8082} error: whitelabel: enabled: false @@ -51,4 +51,4 @@ decorator: article-collector: quartz: - cron: "0 */1 * * * ?" \ No newline at end of file + cron: "0 */1 * * * ?" From 54cb30f62120ac54f72c67c622c422ca69a27a92 Mon Sep 17 00:00:00 2001 From: "Yejeong, Ham" Date: Thu, 15 May 2025 18:37:09 +0900 Subject: [PATCH 18/36] =?UTF-8?q?=E2=9C=A8=20Feature:=20#59=20=EB=89=B4?= =?UTF-8?q?=EC=8A=A4=20=EC=A0=95=EB=B3=B4=20=EC=A0=9C=EA=B3=B5=20=ED=8F=AC?= =?UTF-8?q?=ED=8A=B8=20=EC=96=B4=EB=8C=91=ED=84=B0=20=EA=B5=AC=ED=98=84=20?= =?UTF-8?q?(#60)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ♻️ Refactor: RssNewsRepository 이름 변경 -> NewsInfoJpaRepository * ✨ Feature: ArticleEntityMapper 추가 * ✨ Feature: 뉴스 정보 제공 어댑터 구현 * 💄Style: 주석 위치 이동 * ✨ Feature: 데이터 가져오는 샘플 코드 추가 * Update src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/adapter/NewsInfoProviderAdapter.java ♻️ Refactor: 뉴스카운트 매직넘버 분리 --------- Co-authored-by: Atriel <118334518+Atriel1999@users.noreply.github.com> --- .../batch/index/domain/model/NewsInfo.java | 2 + .../collector/writer/ArticleWriter.java | 8 +-- .../jpa/adapter/NewsInfoProviderAdapter.java | 49 +++++++++++++++++++ .../jpa/mapper/ArticleEntityMapper.java | 17 +++++++ ...sitory.java => NewsInfoJpaRepository.java} | 2 +- .../NewsInfoProviderPortSample.java | 31 ++++++++++++ 6 files changed, 104 insertions(+), 5 deletions(-) create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/adapter/NewsInfoProviderAdapter.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/mapper/ArticleEntityMapper.java rename src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/{RssNewsRepository.java => NewsInfoJpaRepository.java} (81%) create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/sample/index/providerPort/NewsInfoProviderPortSample.java diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/domain/model/NewsInfo.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/domain/model/NewsInfo.java index 9091523..81550e9 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/domain/model/NewsInfo.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/domain/model/NewsInfo.java @@ -2,11 +2,13 @@ import java.time.LocalDateTime; +import lombok.Builder; import lombok.Getter; import lombok.RequiredArgsConstructor; @RequiredArgsConstructor @Getter +@Builder public class NewsInfo{ private final String newsId; private final String title; diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/writer/ArticleWriter.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/writer/ArticleWriter.java index 79bb31a..e66ccc3 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/writer/ArticleWriter.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/writer/ArticleWriter.java @@ -9,7 +9,7 @@ import org.springframework.stereotype.Component; import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; -import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.repository.RssNewsRepository; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.repository.NewsInfoJpaRepository; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -29,7 +29,7 @@ @RequiredArgsConstructor public class ArticleWriter implements ItemWriter> { - private final RssNewsRepository rssNewsRepository; + private final NewsInfoJpaRepository newsInfoJpaRepository; /** * 기사 리스트를 저장하며, 중복된 기사는 건너뛴다. @@ -44,7 +44,7 @@ public void write(Chunk> chunk) { AtomicInteger savedCount = new AtomicInteger(); chunk.getItems().stream() .flatMap(List::stream) - .filter(item -> !rssNewsRepository.existsByLink(item.getLink())) + .filter(item -> !newsInfoJpaRepository.existsByLink(item.getLink())) .forEach(item -> {saveItem(item, savedCount);}); log.info("새로 저장된 뉴스 개수: {}", savedCount.get()); } @@ -59,7 +59,7 @@ public void write(Chunk> chunk) { */ private void saveItem(ArticleEntity item, AtomicInteger savedCount) { try { - rssNewsRepository.save(item); + newsInfoJpaRepository.save(item); savedCount.incrementAndGet(); } catch (DataIntegrityViolationException e) { log.debug("중복 항목 감지: {}", item.getLink()); diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/adapter/NewsInfoProviderAdapter.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/adapter/NewsInfoProviderAdapter.java new file mode 100644 index 0000000..57fe50b --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/adapter/NewsInfoProviderAdapter.java @@ -0,0 +1,49 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.adapter; + +import java.util.List; + +import org.springframework.data.domain.PageRequest; +import org.springframework.data.domain.Pageable; +import org.springframework.data.domain.Sort; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.index.application.port.out.NewsInfoProviderPort; +import com.likelion.backendplus4.talkpick.batch.index.domain.model.NewsInfo; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.mapper.ArticleEntityMapper; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.repository.NewsInfoJpaRepository; + +import lombok.RequiredArgsConstructor; + +/** + * TODO: 이벤트 기반으로 색인 안된 뉴스만 제공하도록 수정 필요 + * NewsInfoProviderPort 인터페이스의 구현체로, + * JPA 리포지토리를 통해 뉴스 정보를 조회하는 어댑터 클래스입니다. + * 현재는 최근 100개 뉴스를 반환합니다. + * @since 2025-05-14 + */ +@Component +@RequiredArgsConstructor +public class NewsInfoProviderAdapter implements NewsInfoProviderPort { + private final NewsInfoJpaRepository newsInfoJpaRepository; + + /** + * 뉴스 정보를 최신순으로 최대 100건까지 조회하여 도메인 객체 리스트로 반환합니다. + * + * @return 뉴스 도메인 객체 리스트 + * @author 함예정 + * @since 2025-05-14 + */ + @Override + public List fetchAll() { + private static final int MAX_NEWS_COUNT = 100; + + Pageable pageable = PageRequest.of(0, MAX_NEWS_COUNT) + .withSort(Sort.by("pubDate").descending()); + + return newsInfoJpaRepository.findAll(pageable) + .getContent() + .stream() + .map(ArticleEntityMapper::toDomainFromEntity) + .toList(); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/mapper/ArticleEntityMapper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/mapper/ArticleEntityMapper.java new file mode 100644 index 0000000..6cbb9bc --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/mapper/ArticleEntityMapper.java @@ -0,0 +1,17 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.mapper; + +import com.likelion.backendplus4.talkpick.batch.index.domain.model.NewsInfo; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; + +public class ArticleEntityMapper { + public static NewsInfo toDomainFromEntity(ArticleEntity articleEntity) { + return NewsInfo.builder() + .newsId(articleEntity.getGuid()) + .title(articleEntity.getTitle()) + .content(articleEntity.getDescription()) + .publishedAt(articleEntity.getPubDate()) + .imageUrl(null) // TODO: 나중에 추가 예정 + .category(articleEntity.getCategory()) + .build(); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/RssNewsRepository.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/NewsInfoJpaRepository.java similarity index 81% rename from src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/RssNewsRepository.java rename to src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/NewsInfoJpaRepository.java index a4b327a..e62b420 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/RssNewsRepository.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/NewsInfoJpaRepository.java @@ -6,7 +6,7 @@ import org.springframework.stereotype.Repository; @Repository -public interface RssNewsRepository extends JpaRepository { +public interface NewsInfoJpaRepository extends JpaRepository { boolean existsByLink(String link); } \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/index/providerPort/NewsInfoProviderPortSample.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/index/providerPort/NewsInfoProviderPortSample.java new file mode 100644 index 0000000..23270cf --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/index/providerPort/NewsInfoProviderPortSample.java @@ -0,0 +1,31 @@ +package com.likelion.backendplus4.talkpick.batch.sample.index.providerPort; + +import static com.likelion.backendplus4.talkpick.batch.common.response.ApiResponse.*; + +import java.util.List; + +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +import com.likelion.backendplus4.talkpick.batch.common.response.ApiResponse; +import com.likelion.backendplus4.talkpick.batch.index.application.port.out.NewsInfoProviderPort; +import com.likelion.backendplus4.talkpick.batch.index.domain.model.NewsInfo; + +import lombok.RequiredArgsConstructor; + +@RestController +@RequiredArgsConstructor +@RequestMapping("/sample/news/info") +public class NewsInfoProviderPortSample { + private final NewsInfoProviderPort newsInfoProviderPort; + + /** + * 실제 사용시에는 Response 객체로 변환 필요 + */ + @GetMapping + public ResponseEntity>> fetchAll() { + return success(newsInfoProviderPort.fetchAll()); + } +} From 838be4bed86303bef9fc8929c4d1af04735f6c71 Mon Sep 17 00:00:00 2001 From: "Yejeong, Ham" Date: Thu, 15 May 2025 18:38:56 +0900 Subject: [PATCH 19/36] =?UTF-8?q?=F0=9F=94=80=20Revert:=20"Feature:=20#59?= =?UTF-8?q?=20=EB=89=B4=EC=8A=A4=20=EC=A0=95=EB=B3=B4=20=EC=A0=9C=EA=B3=B5?= =?UTF-8?q?=20=ED=8F=AC=ED=8A=B8=20=EC=96=B4=EB=8C=91=ED=84=B0=20=EA=B5=AC?= =?UTF-8?q?=ED=98=84=20(#60)"=20(#63)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 54cb30f62120ac54f72c67c622c422ca69a27a92. --- .../batch/index/domain/model/NewsInfo.java | 2 - .../collector/writer/ArticleWriter.java | 8 +-- .../jpa/adapter/NewsInfoProviderAdapter.java | 49 ------------------- .../jpa/mapper/ArticleEntityMapper.java | 17 ------- ...Repository.java => RssNewsRepository.java} | 2 +- .../NewsInfoProviderPortSample.java | 31 ------------ 6 files changed, 5 insertions(+), 104 deletions(-) delete mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/adapter/NewsInfoProviderAdapter.java delete mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/mapper/ArticleEntityMapper.java rename src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/{NewsInfoJpaRepository.java => RssNewsRepository.java} (81%) delete mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/sample/index/providerPort/NewsInfoProviderPortSample.java diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/domain/model/NewsInfo.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/domain/model/NewsInfo.java index 81550e9..9091523 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/domain/model/NewsInfo.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/domain/model/NewsInfo.java @@ -2,13 +2,11 @@ import java.time.LocalDateTime; -import lombok.Builder; import lombok.Getter; import lombok.RequiredArgsConstructor; @RequiredArgsConstructor @Getter -@Builder public class NewsInfo{ private final String newsId; private final String title; diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/writer/ArticleWriter.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/writer/ArticleWriter.java index e66ccc3..79bb31a 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/writer/ArticleWriter.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/writer/ArticleWriter.java @@ -9,7 +9,7 @@ import org.springframework.stereotype.Component; import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; -import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.repository.NewsInfoJpaRepository; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.repository.RssNewsRepository; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -29,7 +29,7 @@ @RequiredArgsConstructor public class ArticleWriter implements ItemWriter> { - private final NewsInfoJpaRepository newsInfoJpaRepository; + private final RssNewsRepository rssNewsRepository; /** * 기사 리스트를 저장하며, 중복된 기사는 건너뛴다. @@ -44,7 +44,7 @@ public void write(Chunk> chunk) { AtomicInteger savedCount = new AtomicInteger(); chunk.getItems().stream() .flatMap(List::stream) - .filter(item -> !newsInfoJpaRepository.existsByLink(item.getLink())) + .filter(item -> !rssNewsRepository.existsByLink(item.getLink())) .forEach(item -> {saveItem(item, savedCount);}); log.info("새로 저장된 뉴스 개수: {}", savedCount.get()); } @@ -59,7 +59,7 @@ public void write(Chunk> chunk) { */ private void saveItem(ArticleEntity item, AtomicInteger savedCount) { try { - newsInfoJpaRepository.save(item); + rssNewsRepository.save(item); savedCount.incrementAndGet(); } catch (DataIntegrityViolationException e) { log.debug("중복 항목 감지: {}", item.getLink()); diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/adapter/NewsInfoProviderAdapter.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/adapter/NewsInfoProviderAdapter.java deleted file mode 100644 index 57fe50b..0000000 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/adapter/NewsInfoProviderAdapter.java +++ /dev/null @@ -1,49 +0,0 @@ -package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.adapter; - -import java.util.List; - -import org.springframework.data.domain.PageRequest; -import org.springframework.data.domain.Pageable; -import org.springframework.data.domain.Sort; -import org.springframework.stereotype.Component; - -import com.likelion.backendplus4.talkpick.batch.index.application.port.out.NewsInfoProviderPort; -import com.likelion.backendplus4.talkpick.batch.index.domain.model.NewsInfo; -import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.mapper.ArticleEntityMapper; -import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.repository.NewsInfoJpaRepository; - -import lombok.RequiredArgsConstructor; - -/** - * TODO: 이벤트 기반으로 색인 안된 뉴스만 제공하도록 수정 필요 - * NewsInfoProviderPort 인터페이스의 구현체로, - * JPA 리포지토리를 통해 뉴스 정보를 조회하는 어댑터 클래스입니다. - * 현재는 최근 100개 뉴스를 반환합니다. - * @since 2025-05-14 - */ -@Component -@RequiredArgsConstructor -public class NewsInfoProviderAdapter implements NewsInfoProviderPort { - private final NewsInfoJpaRepository newsInfoJpaRepository; - - /** - * 뉴스 정보를 최신순으로 최대 100건까지 조회하여 도메인 객체 리스트로 반환합니다. - * - * @return 뉴스 도메인 객체 리스트 - * @author 함예정 - * @since 2025-05-14 - */ - @Override - public List fetchAll() { - private static final int MAX_NEWS_COUNT = 100; - - Pageable pageable = PageRequest.of(0, MAX_NEWS_COUNT) - .withSort(Sort.by("pubDate").descending()); - - return newsInfoJpaRepository.findAll(pageable) - .getContent() - .stream() - .map(ArticleEntityMapper::toDomainFromEntity) - .toList(); - } -} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/mapper/ArticleEntityMapper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/mapper/ArticleEntityMapper.java deleted file mode 100644 index 6cbb9bc..0000000 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/mapper/ArticleEntityMapper.java +++ /dev/null @@ -1,17 +0,0 @@ -package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.mapper; - -import com.likelion.backendplus4.talkpick.batch.index.domain.model.NewsInfo; -import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; - -public class ArticleEntityMapper { - public static NewsInfo toDomainFromEntity(ArticleEntity articleEntity) { - return NewsInfo.builder() - .newsId(articleEntity.getGuid()) - .title(articleEntity.getTitle()) - .content(articleEntity.getDescription()) - .publishedAt(articleEntity.getPubDate()) - .imageUrl(null) // TODO: 나중에 추가 예정 - .category(articleEntity.getCategory()) - .build(); - } -} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/NewsInfoJpaRepository.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/RssNewsRepository.java similarity index 81% rename from src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/NewsInfoJpaRepository.java rename to src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/RssNewsRepository.java index e62b420..a4b327a 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/NewsInfoJpaRepository.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/RssNewsRepository.java @@ -6,7 +6,7 @@ import org.springframework.stereotype.Repository; @Repository -public interface NewsInfoJpaRepository extends JpaRepository { +public interface RssNewsRepository extends JpaRepository { boolean existsByLink(String link); } \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/index/providerPort/NewsInfoProviderPortSample.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/index/providerPort/NewsInfoProviderPortSample.java deleted file mode 100644 index 23270cf..0000000 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/index/providerPort/NewsInfoProviderPortSample.java +++ /dev/null @@ -1,31 +0,0 @@ -package com.likelion.backendplus4.talkpick.batch.sample.index.providerPort; - -import static com.likelion.backendplus4.talkpick.batch.common.response.ApiResponse.*; - -import java.util.List; - -import org.springframework.http.ResponseEntity; -import org.springframework.web.bind.annotation.GetMapping; -import org.springframework.web.bind.annotation.RequestMapping; -import org.springframework.web.bind.annotation.RestController; - -import com.likelion.backendplus4.talkpick.batch.common.response.ApiResponse; -import com.likelion.backendplus4.talkpick.batch.index.application.port.out.NewsInfoProviderPort; -import com.likelion.backendplus4.talkpick.batch.index.domain.model.NewsInfo; - -import lombok.RequiredArgsConstructor; - -@RestController -@RequiredArgsConstructor -@RequestMapping("/sample/news/info") -public class NewsInfoProviderPortSample { - private final NewsInfoProviderPort newsInfoProviderPort; - - /** - * 실제 사용시에는 Response 객체로 변환 필요 - */ - @GetMapping - public ResponseEntity>> fetchAll() { - return success(newsInfoProviderPort.fetchAll()); - } -} From 0fd29b8cc3fcf75d9e57fb926015f9f5f1c07f8b Mon Sep 17 00:00:00 2001 From: Atriel <118334518+Atriel1999@users.noreply.github.com> Date: Thu, 15 May 2025 18:39:58 +0900 Subject: [PATCH 20/36] =?UTF-8?q?=E2=9C=A8=20Feature:=20#56=20=EB=89=B4?= =?UTF-8?q?=EC=8A=A4=EB=B3=B8=EB=AC=B8=20=EC=8A=A4=ED=81=AC=EB=9E=98?= =?UTF-8?q?=ED=95=91=EA=B8=B0=EB=8A=A5=20=EA=B0=9C=EB=B0=9C=20(#57)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ✨ Feat: 스크래핑 인터페이스 구현 및 스크래핑 기능해서 관련 Mapper 수정 * ♻️ Refactor: 엔티티 구조 변경 (isSummary -> Summary 대체, imgurl 추가), 관련 Mapper 기능 구현 * 📦 Chore: Jsoup 의존성 추가 (1.20.1) * 🐛 Fix: 변수 입력값, @Override 관련 코드 수정 * 🐛 Fix: 생성자 관련 이슈 수정 * ✨ Feat: 스크래퍼 1차구현 * 🐛 Fix: 아직 구현하지않은 매핑정보 주석처리 * 🐛 Fix: 언론사별 extractUniqueIdFromLink 메서드 예외처리 통일 * ♻️ Refactor: Mapper, Scrapper 코드 구조 수정, 구현 기능 개선 * ♻️Refactor: 스크래핑 부분 리팩토링 및 CSS, SSL인증서 등등 필요없는 데이터 filter 추가 (받는 데이터량 감소) * 📝Docs: 메서드 내 주석제거 * ✨ Feat: 본문 데이터 직렬화 처리 * ♻️Refactor: Rss 데이터 수집 구조 변경 (중복 데이터 전처리 과정 추가), 필요없는 로그 정리 * ♻️ Refactor: 코드 리팩토링, 오타 및 구조수정(책임분리) --- build.gradle | 4 +- .../collector/config/batch/RssSource.java | 104 ++++++-- .../config/quartz/QuartzTriggerConfig.java | 68 ++--- .../processor/RssEntryProcessor.java | 4 +- .../collector/processor/RssFeedReader.java | 223 ++++++++++------ .../reader/RssSourcePartitioner.java | 191 +++++++------- .../support/mapper/AbstractRssMapper.java | 237 ++++++++++++++++-- .../mapper/implement/DongaRssMapper.java | 74 +++++- .../mapper/implement/KhanRssMapper.java | 188 ++++++++++++-- .../mapper/implement/KmibRssMapper.java | 117 ++++++++- .../support/scraper/ContentScraper.java | 74 ++++++ .../scraper/factory/ScraperFactory.java | 46 ++++ .../implement/DongaContentScraper.java | 131 ++++++++++ .../scraper/implement/KhanContentScraper.java | 203 +++++++++++++++ .../scraper/util/HtmlScraperUtils.java | 203 +++++++++++++++ .../jpa/entity/ArticleEntity.java | 12 +- .../jpa/repository/RssNewsRepository.java | 13 + 17 files changed, 1614 insertions(+), 278 deletions(-) create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/ContentScraper.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/factory/ScraperFactory.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/implement/DongaContentScraper.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/implement/KhanContentScraper.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/util/HtmlScraperUtils.java diff --git a/build.gradle b/build.gradle index 77c6217..115adc7 100644 --- a/build.gradle +++ b/build.gradle @@ -49,7 +49,7 @@ dependencies { implementation 'org.springframework.boot:spring-boot-starter-data-elasticsearch' implementation 'org.elasticsearch.client:elasticsearch-rest-high-level-client:7.17.10' - // OpenAi + //OpenAi implementation 'org.springframework.ai:spring-ai-openai-spring-boot-starter:1.0.0-M5' //Swagger @@ -61,6 +61,8 @@ dependencies { //RSS implementation 'org.springframework.boot:spring-boot-starter-quartz' implementation 'com.rometools:rome:1.18.0' + implementation 'org.jsoup:jsoup:1.20.1' + } tasks.named('test') { diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/batch/RssSource.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/batch/RssSource.java index a82e1d5..8e5c3cc 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/batch/RssSource.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/batch/RssSource.java @@ -2,8 +2,7 @@ import lombok.Getter; -import java.util.Arrays; -import java.util.List; +import java.util.*; import java.util.stream.Collectors; /** @@ -11,47 +10,83 @@ * 각 항목은 언론사, 카테고리, URL 정보를 포함 * * @author 양병학 - * @since 2025-05-10 * @modified 2025-05-12 표준 카테고리(NewsCategory) 도입 및 동아일보, 경향신문 카테고리별 피드 추가 + * @since 2025-05-10 */ @Getter public enum RssSource { // 국민일보 RSS 피드 - KMIB_POLITICS("국민일보", NewsCategory.POLITICS, "https://www.kmib.co.kr/rss/data/kmibPolRss.xml", "km", true), - KMIB_ECONOMY("국민일보", NewsCategory.ECONOMY, "https://www.kmib.co.kr/rss/data/kmibEcoRss.xml", "km", true), - KMIB_SOCIETY("국민일보", NewsCategory.SOCIETY, "https://www.kmib.co.kr/rss/data/kmibSocRss.xml", "km", true), - KMIB_INTERNATIONAL("국민일보", NewsCategory.INTERNATIONAL, "https://www.kmib.co.kr/rss/data/kmibIntRss.xml", "km", true), - KMIB_ENTERTAINMENT("국민일보", NewsCategory.ENTERTAINMENT, "https://www.kmib.co.kr/rss/data/kmibEntRss.xml", "km", true), - KMIB_SPORTS("국민일보", NewsCategory.SPORTS, "https://www.kmib.co.kr/rss/data/kmibSpoRss.xml", "km", true), + KMIB_POLITICS("국민일보", NewsCategory.POLITICS, "https://www.kmib.co.kr/rss/data/kmibPolRss.xml", "km", true, true), + KMIB_ECONOMY("국민일보", NewsCategory.ECONOMY, "https://www.kmib.co.kr/rss/data/kmibEcoRss.xml", "km", true, true), + KMIB_SOCIETY("국민일보", NewsCategory.SOCIETY, "https://www.kmib.co.kr/rss/data/kmibSocRss.xml", "km", true, true), + KMIB_INTERNATIONAL("국민일보", NewsCategory.INTERNATIONAL, "https://www.kmib.co.kr/rss/data/kmibIntRss.xml", "km", true, true), + KMIB_ENTERTAINMENT("국민일보", NewsCategory.ENTERTAINMENT, "https://www.kmib.co.kr/rss/data/kmibEntRss.xml", "km", true, true), + KMIB_SPORTS("국민일보", NewsCategory.SPORTS, "https://www.kmib.co.kr/rss/data/kmibSpoRss.xml", "km", true, true), // 동아일보 RSS 피드 - DONGA_POLITICS("동아일보", NewsCategory.POLITICS, "https://rss.donga.com/politics.xml", "da", true), - DONGA_ECONOMY("동아일보", NewsCategory.ECONOMY, "https://rss.donga.com/economy.xml", "da", true), - DONGA_SOCIETY("동아일보", NewsCategory.SOCIETY, "https://rss.donga.com/national.xml", "da", true), - DONGA_INTERNATIONAL("동아일보", NewsCategory.INTERNATIONAL, "https://rss.donga.com/international.xml", "da", true), - DONGA_ENTERTAINMENT("동아일보", NewsCategory.ENTERTAINMENT, "https://rss.donga.com/entertainment.xml", "da", true), - DONGA_SPORTS("동아일보", NewsCategory.SPORTS, "https://rss.donga.com/sports.xml", "da", true), + DONGA_POLITICS("동아일보", NewsCategory.POLITICS, "https://rss.donga.com/politics.xml", "da", true, false), + DONGA_ECONOMY("동아일보", NewsCategory.ECONOMY, "https://rss.donga.com/economy.xml", "da", true, false), + DONGA_SOCIETY("동아일보", NewsCategory.SOCIETY, "https://rss.donga.com/national.xml", "da", true, false), + DONGA_INTERNATIONAL("동아일보", NewsCategory.INTERNATIONAL, "https://rss.donga.com/international.xml", "da", true, false), + DONGA_ENTERTAINMENT("동아일보", NewsCategory.ENTERTAINMENT, "https://rss.donga.com/entertainment.xml", "da", true, false), + DONGA_SPORTS("동아일보", NewsCategory.SPORTS, "https://rss.donga.com/sports.xml", "da", true, false), // 경향신문 RSS 피드 - KHAN_POLITICS("경향신문", NewsCategory.POLITICS, "https://www.khan.co.kr/rss/rssdata/politic_news.xml", "kh", true), - KHAN_ECONOMY("경향신문", NewsCategory.ECONOMY, "https://www.khan.co.kr/rss/rssdata/economy_news.xml", "kh", true), - KHAN_SOCIETY("경향신문", NewsCategory.SOCIETY, "https://www.khan.co.kr/rss/rssdata/society_news.xml", "kh", true), - KHAN_INTERNATIONAL("경향신문", NewsCategory.INTERNATIONAL, "https://www.khan.co.kr/rss/rssdata/world_news.xml", "kh", true), - KHAN_ENTERTAINMENT("경향신문", NewsCategory.ENTERTAINMENT, "https://www.khan.co.kr/rss/rssdata/art_news.xml", "kh", true), - KHAN_SPORTS("경향신문", NewsCategory.SPORTS, "https://www.khan.co.kr/rss/rssdata/sports_news.xml", "kh", true); + KHAN_POLITICS("경향신문", NewsCategory.POLITICS, "https://www.khan.co.kr/rss/rssdata/politic_news.xml", "kh", true, false), + KHAN_ECONOMY("경향신문", NewsCategory.ECONOMY, "https://www.khan.co.kr/rss/rssdata/economy_news.xml", "kh", true, false), + KHAN_SOCIETY("경향신문", NewsCategory.SOCIETY, "https://www.khan.co.kr/rss/rssdata/society_news.xml", "kh", true, false), + KHAN_INTERNATIONAL("경향신문", NewsCategory.INTERNATIONAL, "https://www.khan.co.kr/rss/rssdata/world_news.xml", "kh", true, false), + KHAN_ENTERTAINMENT("경향신문", NewsCategory.ENTERTAINMENT, "https://www.khan.co.kr/rss/rssdata/art_news.xml", "kh", true, false), + KHAN_SPORTS("경향신문", NewsCategory.SPORTS, "https://www.khan.co.kr/rss/rssdata/sports_news.xml", "kh", true, false); + + /* + // MBN RSS 피드 + MBN_POLITICS("MBN", NewsCategory.POLITICS, "https://www.mbn.co.kr/rss/politics/", "mb", true,false), + MBN_ECONOMY("MBN", NewsCategory.ECONOMY, "https://www.mbn.co.kr/rss/economy/", "mb", true,false), + MBN_SOCIETY("MBN", NewsCategory.SOCIETY, "https://www.mbn.co.kr/rss/society/", "mb", true,false), + MBN_INTERNATIONAL("MBN", NewsCategory.INTERNATIONAL, "https://www.mbn.co.kr/rss/international/", "mb", true,false), + MBN_ENTERTAINMENT("MBN", NewsCategory.ENTERTAINMENT, "https://www.mbn.co.kr/rss/enter/", "mb", true,false), + MBN_SPORTS("MBN", NewsCategory.SPORTS, "https://www.mbn.co.kr/rss/sports/", "mb", true,false), + + // 조선일보 RSS 피드 + CHOSUN_POLITICS("조선일보", NewsCategory.POLITICS, "https://www.chosun.com/arc/outboundfeeds/rss/category/politics/?outputType=xml", "cs", true,false), + CHOSUN_ECONOMY("조선일보", NewsCategory.ECONOMY, "https://www.chosun.com/arc/outboundfeeds/rss/category/economy/?outputType=xml", "cs", true,false), + CHOSUN_SOCIETY("조선일보", NewsCategory.SOCIETY, "https://www.chosun.com/arc/outboundfeeds/rss/category/national/?outputType=xml", "cs", true,false), + CHOSUN_INTERNATIONAL("조선일보", NewsCategory.INTERNATIONAL, "https://www.chosun.com/arc/outboundfeeds/rss/category/international/?outputType=xml", "cs", true,false), + CHOSUN_ENTERTAINMENT("조선일보", NewsCategory.ENTERTAINMENT, "https://www.chosun.com/arc/outboundfeeds/rss/category/entertainments/?outputType=xml", "cs", true,false), + CHOSUN_SPORTS("조선일보", NewsCategory.SPORTS, "https://www.chosun.com/arc/outboundfeeds/rss/category/sports/?outputType=xml", "cs", true,false), + + // 한겨레 RSS 피드 + HANI_POLITICS("한겨레", NewsCategory.POLITICS, "https://www.hani.co.kr/rss/politics/", "hn", true,false), + HANI_ECONOMY("한겨레", NewsCategory.ECONOMY, "https://www.hani.co.kr/rss/economy/", "hn", true,false), + HANI_SOCIETY("한겨레", NewsCategory.SOCIETY, "https://www.hani.co.kr/rss/society/", "hn", true,false), + HANI_INTERNATIONAL("한겨레", NewsCategory.INTERNATIONAL, "https://www.hani.co.kr/rss/international/", "hn", true,false), + HANI_ENTERTAINMENT("한겨레", NewsCategory.ENTERTAINMENT, "https://www.hani.co.kr/rss/culture/", "hn", true,false), + HANI_SPORTS("한겨레", NewsCategory.SPORTS, "https://www.hani.co.kr/rss/sports/", "hn", true,false), + + // 한국경제 RSS 피드 + HANKYUNG_POLITICS("한국경제", NewsCategory.POLITICS, "https://www.hankyung.com/feed/politics", "hk", true,false), + HANKYUNG_ECONOMY("한국경제", NewsCategory.ECONOMY, "https://www.hankyung.com/feed/economy", "hk", true,false), + HANKYUNG_SOCIETY("한국경제", NewsCategory.SOCIETY, "https://www.hankyung.com/feed/society", "hk", true,false), + HANKYUNG_INTERNATIONAL("한국경제", NewsCategory.INTERNATIONAL, "https://www.hankyung.com/feed/international", "hk", true,false), + HANKYUNG_ENTERTAINMENT("한국경제", NewsCategory.ENTERTAINMENT, "https://www.hankyung.com/feed/entertainment", "hk", true,false), + HANKYUNG_SPORTS("한국경제", NewsCategory.SPORTS, "https://www.hankyung.com/feed/sports", "hk", true,false); + */ private final String publisherName; private final NewsCategory category; private final String url; private final String mapperType; private final boolean enabled; + private final boolean hasFullContent; - RssSource(String publisherName, NewsCategory category, String url, String mapperType, boolean enabled) { + RssSource(String publisherName, NewsCategory category, String url, String mapperType, boolean enabled, boolean hasFullContent) { this.publisherName = publisherName; this.category = category; this.url = url; this.mapperType = mapperType; this.enabled = enabled; + this.hasFullContent = hasFullContent; } /** @@ -93,6 +128,31 @@ public static List getEnabledSources() { .collect(Collectors.toList()); } + /** + * RSS에 전체 내용 포함 여부 반환 + * + * @return 전체 내용 포함 여부 + */ + public boolean hasFullContent() { + return hasFullContent; + } + + /** + * 활성화된 RSS 소스 목록에서 매퍼 타입(언론사)별로 하나만 선택하여 반환 + * + * @return 중복 제거된 RSS 소스 목록(언론사당 하나) + */ + public static List getUniqueMapperSources() { + Map uniqueSources = new HashMap<>(); + + for (RssSource source : getEnabledSources()) { + String mapperType = source.getMapperType(); + uniqueSources.putIfAbsent(mapperType, source); + } + + return new ArrayList<>(uniqueSources.values()); + } + /** * 특정 언론사의 모든 소스 반환 */ diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/quartz/QuartzTriggerConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/quartz/QuartzTriggerConfig.java index b59a4cc..17577b7 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/quartz/QuartzTriggerConfig.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/quartz/QuartzTriggerConfig.java @@ -10,40 +10,40 @@ @Configuration public class QuartzTriggerConfig { - private final String cronExpression; - private final JobDetail articleCollectorJobDetail; - private final String articleCollectorJobDetailName = "articleCollectorJobDetail"; + private final String cronExpression; + private final JobDetail articleCollectorJobDetail; + private final String articleCollectorJobDetailName = "articleCollectorJobDetail"; - /** - * 생성자 주입을 통해 Cron 표현식을 설정한다. - * - * @param cronExpression RSS 배치 실행 주기를 정의하는 Cron 표현식 - * application.yml에서 article-collector.quartz.cron 값을 로드 합니다. - * @author 함예정 - * @since 2025-05-10 - */ - public QuartzTriggerConfig(@Value("${article-collector.quartz.cron}") String cronExpression, - JobDetail articleCollectorJobDetail) { - this.cronExpression = cronExpression; - this.articleCollectorJobDetail = articleCollectorJobDetail; - } + /** + * 생성자 주입을 통해 Cron 표현식을 설정한다. + * + * @param cronExpression RSS 배치 실행 주기를 정의하는 Cron 표현식 + * application.yml에서 article-collector.quartz.cron 값을 로드 합니다. + * @author 함예정 + * @since 2025-05-10 + */ + public QuartzTriggerConfig(@Value("${article-collector.quartz.cron}") String cronExpression, + JobDetail articleCollectorJobDetail) { + this.cronExpression = cronExpression; + this.articleCollectorJobDetail = articleCollectorJobDetail; + } - /** - * RSS 수집 Quartz Trigger 빈 등록. - * - forJob: 이 Trigger 가 어떤 Quartz Job 과 연관되어 실행될지를 지정 - * - withIdentity: Scheduler 내에서 이 Trigger 를 고유하게 식별하기 위한 이름 지정 - * - withSchedule: Cron 표현식을 사용하여 실행 주기 설정 - * - * @return RSS 배치 작업용 Trigger 객체 - * @author 함예정 - * @since 2025-05-10 - */ - @Bean - public Trigger rssBatchTrigger() { - return TriggerBuilder.newTrigger() - .forJob(articleCollectorJobDetail) - .withIdentity(articleCollectorJobDetailName) - .withSchedule(CronScheduleBuilder.cronSchedule(cronExpression)) - .build(); - } + /** + * RSS 수집 Quartz Trigger 빈 등록. + * - forJob: 이 Trigger 가 어떤 Quartz Job 과 연관되어 실행될지를 지정 + * - withIdentity: Scheduler 내에서 이 Trigger 를 고유하게 식별하기 위한 이름 지정 + * - withSchedule: Cron 표현식을 사용하여 실행 주기 설정 + * + * @return RSS 배치 작업용 Trigger 객체 + * @author 함예정 + * @since 2025-05-10 + */ + @Bean + public Trigger rssBatchTrigger() { + return TriggerBuilder.newTrigger() + .forJob(articleCollectorJobDetail) + .withIdentity(articleCollectorJobDetailName) + .withSchedule(CronScheduleBuilder.cronSchedule(cronExpression)) + .build(); + } } diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/processor/RssEntryProcessor.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/processor/RssEntryProcessor.java index c51130a..7468f15 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/processor/RssEntryProcessor.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/processor/RssEntryProcessor.java @@ -53,6 +53,7 @@ public RssEntryProcessor(RssFeedReader rssFeedReader, RssMappingFactory mappingF public List process(RssSource source) { List rssParseResult = parseRss(source); AbstractRssMapper mapper = getMapper(source); + return buildArticleEntityList(source, rssParseResult, mapper); } @@ -63,9 +64,10 @@ public List process(RssSource source) { * @return 파싱된 RSS 엔트리 리스트 * @since 2025-05-10 * @author 함예정 + * @modified 2025-05-18 매퍼 타입 전달하도록 수정 */ private List parseRss(RssSource source) { - return rssFeedReader.getFeed(source.getUrl()); + return rssFeedReader.getFeed(source.getUrl(), source.getMapperType()); } /** diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/processor/RssFeedReader.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/processor/RssFeedReader.java index 045ac1c..2c05603 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/processor/RssFeedReader.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/processor/RssFeedReader.java @@ -4,93 +4,176 @@ import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; +import java.time.LocalDateTime; +import java.time.ZoneId; +import java.util.Date; import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.stream.Collectors; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.repository.RssNewsRepository; import com.rometools.rome.feed.synd.SyndEntry; import com.rometools.rome.feed.synd.SyndFeed; import com.rometools.rome.io.SyndFeedInput; import com.rometools.rome.io.XmlReader; +import lombok.extern.slf4j.Slf4j; + /** * RSS 피드 URL을 통해 XML 피드를 읽고 파싱하여 {@link SyndEntry} 목록으로 반환하는 Reader 클래스. * Rome 라이브러리를 이용하여 RSS를 파싱하며, 유효하지 않은 URL 또는 파싱 오류에 대해 예외를 처리한다. * + * @modified 2025-05-18 최신 발행일 이후 데이터만 필터링하는 기능 추가 * @since 2025-05-10 */ +@Slf4j @Component public class RssFeedReader { + private final RssNewsRepository rssNewsRepository; + private static final Map lastProcessedDateMap = new ConcurrentHashMap<>(); + + @Autowired + public RssFeedReader(RssNewsRepository rssNewsRepository) { + this.rssNewsRepository = rssNewsRepository; + } + + /** + * 주어진 피드 URL로부터 RSS 피드를 파싱하고, 최신 발행일 이후의 {@link SyndEntry} 리스트를 반환한다. + * + * @param feedUrl RSS 피드의 URL 문자열 + * @param mapperType 매퍼 타입 (언론사 코드) + * @return 파싱 및 필터링된 SyndEntry 목록 + * @modified 2025-05-18 최신 발행일 이후 데이터만 필터링하는 기능 추가 + * @author 함예정 + * @since 2025-05-10 + */ + public List getFeed(String feedUrl, String mapperType) { + URL url = getURL(feedUrl); + URLConnection connection = openConnectionWithTimeout(url); + List entries = parseRssEntries(connection); + + LocalDateTime latestPubDate = getLatestPubDate(mapperType); + + List filteredEntries = entries.stream() + .filter(entry -> isAfterLatestPubDate(entry, latestPubDate)) + .collect(Collectors.toList()); + + return filteredEntries; + } + + /** + * 언론사별 최신 발행일 조회 (캐싱 추가) + * + * @param mapperType 매퍼 타입 (언론사 코드) + * @return 최신 발행일 또는 기본값 + */ + private LocalDateTime getLatestPubDate(String mapperType) { + String guidPrefix = mapperType.toUpperCase(); + + LocalDateTime latestPubDate = rssNewsRepository.findLatestPubDateByGuidPrefix(guidPrefix); + + if (latestPubDate == null) { + latestPubDate = getDefaultPubDate(); + } + + lastProcessedDateMap.put(mapperType, latestPubDate); + return latestPubDate; + } + + private LocalDateTime getDefaultPubDate() { + LocalDateTime latestPubDate = LocalDateTime.now().minusDays(1); + return latestPubDate; + } + + /** + * 항목의 발행일이 최신 발행일보다 이후인지 확인 + * + * @param entry RSS 항목 + * @param latestPubDate 최신 발행일 + * @return 최신 발행일 이후면 true + */ + private boolean isAfterLatestPubDate(SyndEntry entry, LocalDateTime latestPubDate) { + if (entry.getPublishedDate() == null) { + log.debug("발행일 없음 - 항목 제외: {}", entry.getTitle()); + return false; + } + + LocalDateTime pubDate = convertToLocalDateTime(entry.getPublishedDate()); + + boolean isAfter = pubDate.isAfter(latestPubDate); + + return isAfter; + } + + /** + * Date 객체를 LocalDateTime으로 변환 + * + * @param date 변환할 Date 객체 + * @return 변환된 LocalDateTime + */ + private LocalDateTime convertToLocalDateTime(Date date) { + return date.toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime(); + } + + /** + * 문자열 형태의 URL을 {@link URL} 객체로 변환한다. + * + * @param feedUrl 문자열 형태의 URL + * @return URL 객체 + * @throws RuntimeException 유효하지 않은 URL 형식일 경우 + * @author 함예정 + * @since 2025-05-10 + */ + private URL getURL(String feedUrl) { + try { + return new URL(feedUrl); + } catch (MalformedURLException e) { + throw new RuntimeException(e); + } + } + + /** + * 지정된 URL에 대해 연결 타임아웃과 읽기 타임아웃을 설정한 후 URLConnection을 반환합니다. + * + * @param url 연결할 URL 객체 + * @return 설정된 타임아웃을 가진 URLConnection 객체 + * @throws RuntimeException 연결 중 IOException이 발생할 경우 런타임 예외로 래핑하여 던짐 + * @author 함예정 + * @since 2025-05-12 + */ + private URLConnection openConnectionWithTimeout(URL url) { + try { + URLConnection connection = url.openConnection(); + connection.setConnectTimeout(3000); + connection.setReadTimeout(5000); + return connection; + } catch (IOException e) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.FEED_CONNECTION_ERROR, e); + } + } - /** - * 주어진 피드 URL로부터 RSS 피드를 파싱하고, {@link SyndEntry} 리스트를 반환한다. - * - * @param feedUrl RSS 피드의 URL 문자열 - * @return 파싱된 SyndEntry 목록 - * @since 2025-05-10 - * @author 함예정 - */ - public List getFeed(String feedUrl) { - URL url = getURL(feedUrl); - URLConnection connection = openConnectionWithTimeout(url); - return parseRssEntries(connection); - } - - /** - * 문자열 형태의 URL을 {@link URL} 객체로 변환한다. - * - * @param feedUrl 문자열 형태의 URL - * @return URL 객체 - * @throws RuntimeException 유효하지 않은 URL 형식일 경우 - * @since 2025-05-10 - * @author 함예정 - */ - private URL getURL(String feedUrl) { - try { - return new URL(feedUrl); - } catch (MalformedURLException e) { - throw new RuntimeException(e); - } - } - - /** - * 지정된 URL에 대해 연결 타임아웃과 읽기 타임아웃을 설정한 후 URLConnection을 반환합니다. - * - * @param url 연결할 URL 객체 - * @return 설정된 타임아웃을 가진 URLConnection 객체 - * @throws RuntimeException 연결 중 IOException이 발생할 경우 런타임 예외로 래핑하여 던짐 - * @author 함예정 - * @since 2025-05-12 - */ - private URLConnection openConnectionWithTimeout(URL url) { - try { - URLConnection connection = url.openConnection(); - connection.setConnectTimeout(3000); - connection.setReadTimeout(5000); - return connection; - } catch (IOException e) { - throw new ArticleCollectorException(ArticleCollectorErrorCode.FEED_CONNECTION_ERROR, e); - } - } - - /** - * 주어진 URLConnection으로부터 RSS 피드를 읽어 SyndEntry 목록으로 파싱합니다. - * - * @param connection RSS 피드를 제공하는 URLConnection 객체 - * @return 파싱된 SyndEntry 객체 리스트 - * @throws ArticleCollectorException RSS 피드 파싱 중 오류가 발생한 경우 사용자 정의 예외로 래핑하여 던짐 - * @author 함예정 - * @since 2025-05-12 - */ - private List parseRssEntries(URLConnection connection) { - try (XmlReader reader = new XmlReader(connection)) { - SyndFeedInput input = new SyndFeedInput(); - SyndFeed syndFeed = input.build(reader); - return syndFeed.getEntries(); - } catch (Exception e) { - throw new ArticleCollectorException(ArticleCollectorErrorCode.FEED_PARSING_ERROR, e); - } - } -} + /** + * 주어진 URLConnection으로부터 RSS 피드를 읽어 SyndEntry 목록으로 파싱합니다. + * + * @param connection RSS 피드를 제공하는 URLConnection 객체 + * @return 파싱된 SyndEntry 객체 리스트 + * @throws ArticleCollectorException RSS 피드 파싱 중 오류가 발생한 경우 사용자 정의 예외로 래핑하여 던짐 + * @author 함예정 + * @since 2025-05-12 + */ + private List parseRssEntries(URLConnection connection) { + try (XmlReader reader = new XmlReader(connection)) { + SyndFeedInput input = new SyndFeedInput(); + SyndFeed syndFeed = input.build(reader); + return syndFeed.getEntries(); + } catch (Exception e) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.FEED_PARSING_ERROR, e); + } + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/reader/RssSourcePartitioner.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/reader/RssSourcePartitioner.java index 0281f11..a9ec1e7 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/reader/RssSourcePartitioner.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/reader/RssSourcePartitioner.java @@ -11,121 +11,114 @@ import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.batch.RssSource; -import lombok.extern.slf4j.Slf4j; - /** * 활성화된 RSS 소스를 파티션 단위로 분할하여 StepExecutionContext에 전달하는 Partitioner 구현체. * Spring Batch에서 멀티 스레드/병렬 실행을 위해 사용된다. - * + *

* 각 파티션은 sourceList를 포함한 ExecutionContext로 구성된다. * * @since 2025-05-10 */ -@Slf4j @Component public class RssSourcePartitioner implements Partitioner { + /** + * 전체 RSS 소스를 파티셔닝하여 각 파티션별 ExecutionContext를 생성한다. + * 모든 활성화된 RSS 소스(카테고리 포함)를 처리한다. + * + * @param gridSize 실행할 파티션 수 + * @return 파티션 이름과 ExecutionContext의 매핑 정보 + * @modified 2025-05-14 모든 카테고리 처리하도록 수정 + * @author 함예정 + * @since 2025-05-10 + */ + @Override + public Map partition(int gridSize) { + List allSources = RssSource.getEnabledSources(); - /** - * 전체 RSS 소스를 파티셔닝하여 각 파티션별 ExecutionContext를 생성한다. - * - * @param gridSize 실행할 파티션 수 - * @return 파티션 이름과 ExecutionContext의 매핑 정보 - * @since 2025-05-10 - * @author 함예정 - */ - @Override - public Map partition(int gridSize) { - List sources = RssSource.getEnabledSources(); - int chunkSize = calculateChunkSize(sources.size(), gridSize); - return buildPartitions(sources, chunkSize); - } + int chunkSize = calculateChunkSize(allSources.size(), gridSize); + return buildPartitions(allSources, chunkSize); + } - /** - * 총 소스 수와 파티션 수를 기반으로 파티션당 소스 개수를 계산한다. - * - * @param totalSources 전체 RSS 소스 수 - * @param gridSize 파티션 수 - * @return 파티션당 소스 개수 - * @since 2025-05-10 - * @author 함예정 - */ - private int calculateChunkSize(int totalSources, int gridSize) { - int chunkSize = (int)Math.ceil((double)totalSources / gridSize); - log.info("Calculated chunkSize: {}", chunkSize); - return chunkSize; - } + /** + * 총 소스 수와 파티션 수를 기반으로 파티션당 소스 개수를 계산한다. + * + * @param totalSources 전체 RSS 소스 수 + * @param gridSize 파티션 수 + * @return 파티션당 소스 개수 + * @since 2025-05-10 + */ + private int calculateChunkSize(int totalSources, int gridSize) { + int chunkSize = (int) Math.ceil((double) totalSources / gridSize); + return chunkSize; + } - /** - * RSS 소스를 주어진 chunkSize로 나눠 각 파티션별 ExecutionContext를 생성한다. - * - * @param sources RSS 소스 리스트 - * @param chunkSize 파티션당 소스 개수 - * @return 파티션 맵 - * @since 2025-05-10 - * @author 함예정 - */ - private Map buildPartitions(List sources, int chunkSize) { - Map partitions = new HashMap<>(); - int totalPartitions = calculateTotalPartitions(sources, chunkSize); + /** + * RSS 소스를 주어진 chunkSize로 나눠 각 파티션별 ExecutionContext를 생성한다. + * + * @param sources RSS 소스 리스트 + * @param chunkSize 파티션당 소스 개수 + * @return 파티션 맵 + * @since 2025-05-10 + */ + private Map buildPartitions(List sources, int chunkSize) { + Map partitions = new HashMap<>(); + int totalPartitions = calculateTotalPartitions(sources, chunkSize); - for (int i = 0; i < totalPartitions; i++) { - int from = i * chunkSize; - int to = calculateChunkEndIndex(sources, chunkSize, from); + for (int i = 0; i < totalPartitions; i++) { + int from = i * chunkSize; + int to = calculateChunkEndIndex(sources, chunkSize, from); - if (from >= to) { - break; - } + if (from >= to) { + break; + } - ExecutionContext context = buildExecutionContext(sources, from, to); - partitions.put("partition" + i, context); - } + ExecutionContext context = buildExecutionContext(sources, from, to); + partitions.put("partition" + i, context); + } - return partitions; - } + return partitions; + } - /** - * 주어진 RSS 소스 리스트를 청크 크기(chunkSize)로 분할할 때 필요한 총 파티션 수를 계산합니다. - * - * @param sources RSS 소스 목록 - * @param chunkSize 하나의 파티션에 포함될 RSS 소스 수 - * @return 전체 파티션 수 - * @author 함예정 - * @since 2025-05-12 - */ - private int calculateTotalPartitions(List sources, int chunkSize) { - return (sources.size() + chunkSize - 1) / chunkSize; - } + /** + * 주어진 RSS 소스 리스트를 청크 크기(chunkSize)로 분할할 때 필요한 총 파티션 수를 계산합니다. + * + * @param sources RSS 소스 목록 + * @param chunkSize 하나의 파티션에 포함될 RSS 소스 수 + * @return 전체 파티션 수 + * @since 2025-05-12 + */ + private int calculateTotalPartitions(List sources, int chunkSize) { + return (sources.size() + chunkSize - 1) / chunkSize; + } - /** - * 주어진 시작 인덱스(from)와 청크 크기(chunkSize)를 기반으로, - * 리스트의 범위를 초과하지 않도록 제한된 끝 인덱스를 계산합니다. - * - * @param sources RSS 소스 리스트 - * @param chunkSize 하나의 파티션에 포함될 RSS 소스 수 - * @param from 시작 인덱스 - * @return 리스트 범위를 초과하지 않는 끝 인덱스 - * @author 함예정 - * @since 2025-05-12 - */ - private int calculateChunkEndIndex(List sources, int chunkSize, int from) { - return Math.min(from + chunkSize, sources.size()); - } + /** + * 주어진 시작 인덱스(from)와 청크 크기(chunkSize)를 기반으로, + * 리스트의 범위를 초과하지 않도록 제한된 끝 인덱스를 계산합니다. + * + * @param sources RSS 소스 리스트 + * @param chunkSize 하나의 파티션에 포함될 RSS 소스 수 + * @param from 시작 인덱스 + * @return 리스트 범위를 초과하지 않는 끝 인덱스 + * @since 2025-05-12 + */ + private int calculateChunkEndIndex(List sources, int chunkSize, int from) { + return Math.min(from + chunkSize, sources.size()); + } - /** - * 지정된 인덱스 범위에 해당하는 RSS 소스 부분 리스트로 ExecutionContext를 생성한다. - * 생성된 context는 Spring Batch 파티션 실행 시 각 Step에 전달된다. - * - * @param sources 전체 RSS 소스 리스트 - * @param from 시작 인덱스 (포함) - * @param to 종료 인덱스 (미포함) - * @return 파티션별 RSS 소스가 포함된 ExecutionContext - * @since 2025-05-10 - * @author 함예정 - */ - private ExecutionContext buildExecutionContext(List sources, int from, int to) { - List subList = new ArrayList<>(sources.subList(from, to)); - ExecutionContext context = new ExecutionContext(); - context.put("sourceList", subList); - return context; - } -} + /** + * 지정된 인덱스 범위에 해당하는 RSS 소스 부분 리스트로 ExecutionContext를 생성한다. + * 생성된 context는 Spring Batch 파티션 실행 시 각 Step에 전달된다. + * + * @param sources 전체 RSS 소스 리스트 + * @param from 시작 인덱스 (포함) + * @param to 종료 인덱스 (미포함) + * @return 파티션별 RSS 소스가 포함된 ExecutionContext + * @since 2025-05-10 + */ + private ExecutionContext buildExecutionContext(List sources, int from, int to) { + List subList = new ArrayList<>(sources.subList(from, to)); + ExecutionContext context = new ExecutionContext(); + context.put("sourceList", subList); + return context; + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/AbstractRssMapper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/AbstractRssMapper.java index b021c69..ffb9f35 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/AbstractRssMapper.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/AbstractRssMapper.java @@ -1,16 +1,22 @@ package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.batch.RssSource; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.ContentScraper; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.factory.ScraperFactory; import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; import com.rometools.rome.feed.synd.SyndContent; import com.rometools.rome.feed.synd.SyndEntry; +import org.springframework.beans.factory.annotation.Autowired; +import java.io.IOException; import java.time.LocalDateTime; import java.time.ZoneId; -import java.util.Date; -import java.util.Optional; +import java.util.*; +import java.util.stream.Collectors; /** * RSS를 ArticleEntity로 변환하는 추상 클래스 @@ -18,9 +24,13 @@ * * @author 양병학 * @since 2025-05-13 최초 작성 + * @modified 2025-05-15 의존성 주입 방식 개선 (템플릿 메서드 패턴 적용) */ public abstract class AbstractRssMapper { + protected abstract ScraperFactory getScraperFactory(); + private static final ObjectMapper objectMapper = new ObjectMapper(); + /** * RSS 피드를 ArticleEntity 엔티티로 변환 * @@ -29,16 +39,59 @@ public abstract class AbstractRssMapper { * @return 변환된 ArticleEntity 엔티티 */ public ArticleEntity mapToRssNews(SyndEntry entry, RssSource source) { - String title = extractTitle(entry); - String link = extractLink(entry); - LocalDateTime pubDate = extractPubDate(entry); - String guid = extractGuid(entry, source); - String description = extractDescription(entry); - String category = extractCategory(entry, source); + ArticleInfo info = extractBasicInfo(entry, source); + + String content = determineContent(info.description, info.link, source); + + return buildArticleEntity( + info.title, + info.link, + info.pubDate, + info.guid, + content, + info.category, + info.imageUrl); + } + + /** + * RSS 항목에서 기본 정보 추출 + */ + private ArticleInfo extractBasicInfo(SyndEntry entry, RssSource source) { + return new ArticleInfo( + extractTitle(entry), + extractLink(entry), + extractPubDate(entry), + extractGuid(entry, source), + extractDescription(entry), + extractCategory(entry, source), + extractImageUrl(entry) + ); + } - return buildArticleEntity(title, link, pubDate, guid, description, category); + /** + * 본문 내용 결정 (RSS 또는 스크래핑) + */ + private String determineContent(String description, String link, RssSource source) { + if (source.hasFullContent()) { + return description; + } + + return getContentWithScraping(description, link, source.getMapperType()); } + /** + * 기사 기본 정보를 담는 내부 클래스 + */ + private record ArticleInfo( + String title, + String link, + LocalDateTime pubDate, + String guid, + String description, + String category, + String imageUrl + ) {} + /** * 매퍼의 유형을 식별하는 코드 반환 * 소문자 언론사 코드 형태 (예: "km", "da", "kh") @@ -94,6 +147,70 @@ protected String extractDescription(SyndEntry entry) { return entry.getDescription() != null ? entry.getDescription().getValue() : null; } + /** + * 이미지 URL 추출 메서드 + * media:content 태그에서 이미지 URL 추출 + * + * @param entry RSS 항목 + * @return 이미지 URL + */ + protected String extractImageUrl(SyndEntry entry) { + return entry.getForeignMarkup().stream() + .filter(element -> "content".equals(element.getName()) && + "media".equals(element.getNamespacePrefix())) + .findFirst() + .map(element -> element.getAttributeValue("url")) + .orElse(""); + } + + /** + * 본문 내용을 가져오는 메서드 + * + * @param originalDescription RSS에서 추출한 기본 설명 + * @param link 기사 URL + * @param mapperType 매퍼 타입 + * @return 최종 본문 내용 + */ + private String getContentWithScraping(String originalDescription, String link, String mapperType) { + ContentScraper scraper = findScraper(mapperType); + return scrapeContent(scraper, link, originalDescription); + } + + /** + * 매퍼 타입에 맞는 스크래퍼를 찾음 + * + * @param mapperType 매퍼 타입 + * @return 스크래퍼 객체 + * @throws ArticleCollectorException 스크래퍼를 찾을 수 없는 경우 + */ + private ContentScraper findScraper(String mapperType) { + return getScraperFactory().getScraper(mapperType) + .orElseThrow(() -> new ArticleCollectorException(ArticleCollectorErrorCode.MAPPER_NOT_FOUND)); + } + + /** + * 스크래퍼를 사용하여 콘텐츠 스크래핑 수행 + * + * @param scraper 스크래퍼 객체 + * @param link 기사 URL + * @param fallbackContent 스크래핑 실패 시 사용할 대체 콘텐츠 + * @return 스크래핑된 콘텐츠 또는 대체 콘텐츠 + */ + private String scrapeContent(ContentScraper scraper, String link, String fallbackContent) { + try { + String scrapedContent = scraper.scrapeContent(link); + return scrapedContent != null && !scrapedContent.isEmpty() + ? scrapedContent + : fallbackContent; + } catch (ArticleCollectorException e) { + throw e; + } catch (IllegalArgumentException e) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.INVALID_JOB_PARAMETER, e); + } catch (Exception e) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.FEED_PARSING_ERROR, e); + } + } + /** * 카테고리 추출 메서드 * @@ -114,19 +231,8 @@ protected String extractCategory(SyndEntry entry, RssSource source) { */ protected abstract String extractGuid(SyndEntry entry, RssSource source); - /** - * ArticleEntity 객체 생성 - * - * @param title 제목 - * @param link 링크 - * @param pubDate 발행일 - * @param guid GUID - * @param description 설명 - * @param category 카테고리 - * @return 생성된 ArticleEntity - */ private ArticleEntity buildArticleEntity(String title, String link, LocalDateTime pubDate, - String guid, String description, String category) { + String guid, String description, String category, String imageUrl) { return ArticleEntity.builder() .title(title) .link(link) @@ -134,7 +240,94 @@ private ArticleEntity buildArticleEntity(String title, String link, LocalDateTim .category(category) .guid(guid) .description(description) - .isSummary(false) + .imageUrl(imageUrl) .build(); } + + /** + * HTML 문자열에서 모든 태그를 제거하고 문단을 추출하는 공통 메서드 + * + * @param html HTML 문자열 + * @return 정제된 문단 리스트 + */ + protected List extractCleanParagraphs(String html) { + if (html == null || html.isEmpty()) { + return new ArrayList<>(); + } + + try { + String withBreaks = html.replaceAll("", "PARAGRAPH_BREAK"); + String noTags = withBreaks.replaceAll("<[^>]*>", ""); + String decoded = noTags.replace(" ", " ") + .replace(" ", " ") + .replace("<", "<") + .replace(">", ">") + .replace("&", "&") + .replace(""", "\"") + .replace("'", "'"); + + decoded = decoded.replaceAll("\\s+", " ").trim(); + String[] paragraphs = decoded.split("PARAGRAPH_BREAK"); + + return Arrays.stream(paragraphs) + .map(String::trim) + .filter(p -> !p.isEmpty()) + .collect(Collectors.toList()); + } catch (Exception e) { + List fallback = new ArrayList<>(); + fallback.add(removeAllHtmlTags(html)); + return fallback; + } + } + + /** + * 모든 HTML 태그 제거하는 공통 메서드 + * + * @param html HTML 문자열 + * @return 태그가 제거된 문자열 + */ + protected String removeAllHtmlTags(String html) { + if (html == null || html.isEmpty()) { + return ""; + } + + String noTags = html.replaceAll("<[^>]*>", ""); + String decoded = decodeHtmlEntities(noTags); + + return decoded.replaceAll("\\s+", " ").trim(); + } + + /** + * HTML 엔티티를 디코딩하는 유틸리티 메서드 + * + * @param text HTML 엔티티가 포함된 문자열 + * @return 디코딩된 문자열 + */ + protected String decodeHtmlEntities(String text) { + if (text == null || text.isEmpty()) { + return ""; + } + + return text.replace(" ", " ") + .replace(" ", " ") + .replace("<", "<") + .replace(">", ">") + .replace("&", "&") + .replace(""", "\"") + .replace("'", "'"); + } + + /** + * 문단 리스트를 JSON으로 직렬화하는 공통 메서드 + * + * @param paragraphs 문단 리스트 + * @return JSON 문자열 + */ + protected String serializeParagraphs(List paragraphs) { + try { + return objectMapper.writeValueAsString(paragraphs); + } catch (JsonProcessingException e) { + return String.join("\n\n", paragraphs); + } + } } \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/DongaRssMapper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/DongaRssMapper.java index f34f2bd..d8a0ba5 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/DongaRssMapper.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/DongaRssMapper.java @@ -1,14 +1,20 @@ package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper.implement; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.batch.RssSource; import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper.AbstractRssMapper; -import com.rometools.rome.feed.synd.SyndCategory; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.factory.ScraperFactory; import com.rometools.rome.feed.synd.SyndEntry; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; import java.util.stream.Collectors; /** @@ -16,11 +22,29 @@ * * @author 양병학 * @since 2025-05-10 최초 작성 - * @modified 2025-05-13 AbstractRssMapper 상속 구조로 변경 및 활성화 + * @modified 2025-05-15 템플릿 메서드 패턴 적용, 의존성 주입 방식 개선 + * @modified 2025-05-17 HTML 태그 제거 및 문단 직렬화 기능 추가 */ @Component public class DongaRssMapper extends AbstractRssMapper { + private final ScraperFactory scraperFactory; + + @Autowired + public DongaRssMapper(ScraperFactory scraperFactory) { + this.scraperFactory = scraperFactory; + } + + /** + * 템플릿 메서드 패턴 + * + * @return 주입받은 ScraperFactory 인스턴스 + */ + @Override + protected ScraperFactory getScraperFactory() { + return this.scraperFactory; + } + /** * 매퍼 타입 반환 * @@ -49,10 +73,11 @@ protected String extractGuid(SyndEntry entry, RssSource source) { * * @param link 기사 링크 * @return 추출된 고유 ID + * @throws ArticleCollectorException 링크가 null이거나 ID를 추출할 수 없는 경우 */ private String extractUniqueIdFromLink(String link) { - if (link == null) { - return String.valueOf(System.currentTimeMillis()); + if (link == null || link.trim().isEmpty()) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.ITEM_MAPPING_ERROR); } try { @@ -61,14 +86,14 @@ private String extractUniqueIdFromLink(String link) { return parts[parts.length - 2]; } } catch (Exception e) { - throw new ArticleCollectorException(ArticleCollectorErrorCode.ITEM_MAPPING_ERROR); + throw new ArticleCollectorException(ArticleCollectorErrorCode.ITEM_MAPPING_ERROR, e); } - return String.valueOf(System.currentTimeMillis()); + throw new ArticleCollectorException(ArticleCollectorErrorCode.ITEM_MAPPING_ERROR); } /** - * 카테고리 enum 정보 추출 + * 카테고리 정보 추출 * * @param entry RSS 항목 * @param source RSS 소스 정보 @@ -78,4 +103,39 @@ private String extractUniqueIdFromLink(String link) { protected String extractCategory(SyndEntry entry, RssSource source) { return source.getCategoryName(); } + + /** + * RSS description에서 HTML 태그를 제거하고 문단을 추출하여 직렬화 + * + * @param entry RSS 항목 + * @return 직렬화된 문단 JSON 또는 원본 description + */ + @Override + protected String extractDescription(SyndEntry entry) { + if (entry.getDescription() == null) { + return ""; + } + + String rawDescription = entry.getDescription().getValue(); + if (rawDescription == null || rawDescription.isEmpty()) { + return ""; + } + + return processHtmlContent(rawDescription); + } + + /** + * HTML 콘텐츠 처리하여 정제된 문단 직렬화 + * + * @param htmlContent HTML 콘텐츠 + * @return 직렬화된 문단 JSON 또는 태그가 제거된 텍스트 + */ + private String processHtmlContent(String htmlContent) { + try { + List paragraphs = extractCleanParagraphs(htmlContent); + return serializeParagraphs(paragraphs); + } catch (Exception e) { + return removeAllHtmlTags(htmlContent); + } + } } \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/KhanRssMapper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/KhanRssMapper.java index c90d439..875bb38 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/KhanRssMapper.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/KhanRssMapper.java @@ -1,15 +1,23 @@ package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper.implement; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.batch.RssSource; import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper.AbstractRssMapper; -import com.rometools.rome.feed.synd.SyndCategory; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.ContentScraper; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.factory.ScraperFactory; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; import com.rometools.rome.feed.synd.SyndEntry; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; import java.time.LocalDateTime; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; import java.util.stream.Collectors; /** @@ -17,11 +25,118 @@ * * @author 양병학 * @since 2025-05-10 최초 작성 - * @modified 2025-05-13 AbstractRssMapper 상속 구조로 변경 및 활성화 + * @modified 2025-05-15 템플릿 메서드 패턴 적용, 의존성 주입 방식 개선 + * @modified 2025-05-17 mapToRssNews 메서드 오버라이드 및 문단 직렬화 기능 추가 */ @Component public class KhanRssMapper extends AbstractRssMapper { + private final ScraperFactory scraperFactory; + + @Autowired + public KhanRssMapper(ScraperFactory scraperFactory) { + this.scraperFactory = scraperFactory; + } + + /** + * 템플릿 메서드 패턴 + * + * @return 주입받은 ScraperFactory 인스턴스 + */ + @Override + protected ScraperFactory getScraperFactory() { + return this.scraperFactory; + } + + /** + * RSS 피드를 ArticleEntity 엔티티로 변환 (오버라이드) + * 경향신문 특화 구현 - 본문과 이미지 URL을 효율적으로 스크래핑하고 문단 직렬화 + * + * @param entry 변환할 SyndEntry(Rss 데이터) 객체 + * @param source RSS 소스 정보 + * @return 변환된 ArticleEntity 엔티티 + */ + @Override + public ArticleEntity mapToRssNews(SyndEntry entry, RssSource source) { + String title = extractTitle(entry); + String link = extractLink(entry); + LocalDateTime pubDate = extractPubDate(entry); + String guid = extractGuid(entry, source); + String description = extractDescription(entry); + String category = extractCategory(entry, source); + String imageUrl = super.extractImageUrl(entry); + + ContentResult contentResult; + if (source.hasFullContent()) { + contentResult = new ContentResult(description, imageUrl); + } else { + contentResult = scrapeContentAndImage(link, description, imageUrl); + } + + return ArticleEntity.builder() + .title(title) + .link(link) + .pubDate(pubDate) + .category(category) + .guid(guid) + .description(contentResult.getContent()) + .imageUrl(contentResult.getImageUrl()) + .build(); + } + + /** + * 본문과 이미지 URL을 스크래핑하고 처리하는 메서드 + * + * @param link 기사 URL + * @param fallbackDescription 스크래핑 실패 시 사용할 설명 + * @param fallbackImageUrl 스크래핑 실패 시 사용할 이미지 URL + * @return 처리된 콘텐츠와 이미지 URL이 포함된 결과 객체 + */ + private ContentResult scrapeContentAndImage(String link, String fallbackDescription, String fallbackImageUrl) { + try { + ContentScraper scraper = getScraperFactory().getScraper(getMapperType()) + .orElseThrow(() -> new ArticleCollectorException(ArticleCollectorErrorCode.MAPPER_NOT_FOUND)); + + String content = fallbackDescription; + String scrapedContent = scraper.scrapeContent(link); + if (scrapedContent != null && !scrapedContent.isEmpty()) { + List paragraphs = Arrays.asList(scrapedContent.split("\n\n")); + content = serializeParagraphs(paragraphs); + } + + String imageUrl = fallbackImageUrl; + if (imageUrl == null || imageUrl.isEmpty()) { + imageUrl = scraper.scrapeImageUrl(link); + } + + return new ContentResult(content, imageUrl); + } catch (Exception e) { + System.err.println("경향신문 스크래핑 실패: " + e.getMessage()); + return new ContentResult(fallbackDescription, fallbackImageUrl); + } + } + + /** + * 콘텐츠 결과를 담는 내부 클래스 + */ + private static class ContentResult { + private final String content; + private final String imageUrl; + + public ContentResult(String content, String imageUrl) { + this.content = content; + this.imageUrl = imageUrl; + } + + public String getContent() { + return content; + } + + public String getImageUrl() { + return imageUrl; + } + } + /** * 매퍼 타입 반환 * @@ -50,28 +165,73 @@ protected String extractGuid(SyndEntry entry, RssSource source) { * * @param link 기사 링크 * @return 추출된 고유 ID + * @throws ArticleCollectorException 링크가 null이거나 ID를 추출할 수 없는 경우 */ private String extractUniqueIdFromLink(String link) { - if (link == null) { - return String.valueOf(System.currentTimeMillis()); - } + validateLink(link); try { - String[] parts = link.split("/"); - for (int i = 0; i < parts.length; i++) { - if ("article".equals(parts[i]) && i + 1 < parts.length) { - return parts[i + 1]; + String[] pathParts = splitLinkPath(link); + return findArticleIdInPath(pathParts); + } catch (Exception e) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.ITEM_MAPPING_ERROR, e); + } + } + + /** + * 링크 유효성 검사 + * + * @param link 검사할 링크 + * @throws ArticleCollectorException 링크가 null이거나 비어있는 경우 + */ + private void validateLink(String link) { + if (link == null || link.trim().isEmpty()) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.ITEM_MAPPING_ERROR); + } + } + + /** + * 링크를 경로 부분으로 분리 + * + * @param link 분리할 링크 + * @return 경로 부분 배열 + */ + private String[] splitLinkPath(String link) { + return link.split("/"); + } + + /** + * 경로 부분에서 기사 ID 찾기 + * + * @param pathParts 경로 부분 배열 + * @return 기사 ID + * @throws ArticleCollectorException 기사 ID를 찾을 수 없는 경우 + */ + private String findArticleIdInPath(String[] pathParts) { + for (int i = 0; i < pathParts.length; i++) { + if ("article".equals(pathParts[i]) && i + 1 < pathParts.length) { + String id = pathParts[i + 1]; + if (isValidArticleId(id)) { + return id; } } - } catch (Exception e) { + } + throw new ArticleCollectorException(ArticleCollectorErrorCode.ITEM_MAPPING_ERROR); } - return String.valueOf(System.currentTimeMillis()); + /** + * 기사 ID 유효성 검사 + * + * @param id 검사할 ID + * @return 유효성 여부 + */ + private boolean isValidArticleId(String id) { + return id != null && !id.trim().isEmpty(); } /** - * 발행일 추출, 경향신문은 dc:date 태그도 확인 + * 발행일 추출, 경향신문은 dc:date 태그 확인 * * @param entry RSS 항목 * @return 발행일 LocalDateTime @@ -86,7 +246,7 @@ protected LocalDateTime extractPubDate(SyndEntry entry) { } /** - * Dublin Core date 태그에서 발행일 추출 + * date 태그에서 발행일 추출 * * @param entry RSS 항목 * @return 추출된 발행일, 없으면 현재 시간 @@ -115,7 +275,7 @@ private LocalDateTime parseDateTime(String dateString) { } /** - * 카테고리 enum 정보 추출 + * 카테고리 정보 추출 * * @param entry RSS 항목 * @param source RSS 소스 정보 diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/KmibRssMapper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/KmibRssMapper.java index 6f56310..bcd8f8b 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/KmibRssMapper.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/KmibRssMapper.java @@ -1,25 +1,56 @@ package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper.implement; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; +import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.batch.RssSource; import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper.AbstractRssMapper; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.factory.ScraperFactory; import com.rometools.rome.feed.synd.SyndEntry; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Collectors; /** * 국민일보 RSS 매퍼 구현체 * * @author 양병학 * @since 2025-05-10 최초 작성 - * @modified 2025-05-13 AbstractRssMapper 상속 구조로 변경 + * @modified 2025-05-15 템플릿 메서드 패턴 적용, 의존성 주입 방식 개선 + * @modified 2025-05-17 HTML 태그 제거 및 문단 직렬화 기능 추가 */ @Component public class KmibRssMapper extends AbstractRssMapper { private static final Pattern ARCID_PATTERN = Pattern.compile("arcid=([0-9]+)"); + private static final Pattern IMG_SRC_PATTERN = Pattern.compile(""" + paragraphs = extractCleanParagraphs(rawDescription); + return serializeParagraphs(paragraphs); + } catch (Exception e) { + return removeAllHtmlTags(rawDescription); } - return link; } } \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/ContentScraper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/ContentScraper.java new file mode 100644 index 0000000..6ce2fb9 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/ContentScraper.java @@ -0,0 +1,74 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper; + +import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; +import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; + +import java.io.IOException; +import java.util.List; + +/** + * 뉴스 본문 스크랩 interface + * 스크래핑 로직이 신문사 마다 다름 + * + * @author 양병학 + * @since 2025-05-13 최초 작성 + */ +public interface ContentScraper { + + /** + * 뉴스 URL에서 본문 내용을 문단 단위로 스크래핑 + * + * @param url 뉴스 URL + * @return 문단 단위로 나눈 본문 리스트 + * @throws ArticleCollectorException 스크래핑 중 오류 발생 시 + */ + List scrapeParagraphs(String url) throws ArticleCollectorException; + + /** + * 뉴스 URL에서 본문 내용을 텍스트로 스크래핑 + * + * @param url 뉴스 URL + * @return 스크래핑된 본문 + * @throws ArticleCollectorException 스크래핑 중 오류 발생 시 + */ + String scrapeContent(String url) throws ArticleCollectorException; + + /** + * 뉴스 URL에서 이미지 URL을 스크래핑 + * + * @param url 뉴스 URL + * @return 스크래핑된 이미지 URL + */ + String scrapeImageUrl(String url); + + /** + * 스크래퍼가 지원하는 Mapper type 반환 + * + * @return Mapper Type 영문 2자 (예: "km", "da") + */ + String getSupportedMapperType(); + + + /** + * URL에 연결하여 Document 객체 반환 (기본 구현) + * + * @param url 연결할 URL + * @return 파싱된 JSoup Document + * @throws ArticleCollectorException 연결 오류 발생 시 FEED_PARSING_ERROR 예외 발생 + */ + default Document connectToUrl(String url) { + try { + return Jsoup.connect(url) + .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36") + .timeout(5000) + .ignoreContentType(true) + .maxBodySize(1024 * 1024) + .followRedirects(true) + .get(); + } catch (IOException e) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.FEED_PARSING_ERROR, e); + } + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/factory/ScraperFactory.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/factory/ScraperFactory.java new file mode 100644 index 0000000..57a2985 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/factory/ScraperFactory.java @@ -0,0 +1,46 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.factory; + +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Component; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.ContentScraper; + +/** + * 뉴스 스크래퍼 사용하는 factory Class + * + * @author 양병학 + * @since 2025-05-13 최초 작성 + */ +@Component +public class ScraperFactory { + + private final Map scrapers = new HashMap<>(); + + /** + * ContentScraper 구현체 등록 + * + * @param availableScrapers ContentScraper 목록 + */ + @Autowired + public ScraperFactory(List availableScrapers) { + for (ContentScraper scraper : availableScrapers) { + String mapperType = scraper.getSupportedMapperType(); + scrapers.put(mapperType, scraper); + } + } + + /** + * Mapper Type에 맞는 스크래퍼 반환 + * + * @param mapperType 매퍼 타입 (예: "km", "da") + * @return 해당 타입의 스크래퍼 or null일시 Optional로 빈 값 반환 + */ + public Optional getScraper(String mapperType) { + return Optional.ofNullable(scrapers.get(mapperType)); + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/implement/DongaContentScraper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/implement/DongaContentScraper.java new file mode 100644 index 0000000..7885ae3 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/implement/DongaContentScraper.java @@ -0,0 +1,131 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.implement; + +import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; +import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.ContentScraper; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.util.HtmlScraperUtils; + +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.springframework.stereotype.Component; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * 동아일보 기사 본문 스크래퍼 구현체 + * + * @author 양병학 + * @since 2025-05-13 최초 작성 + */ +@Component +public class DongaContentScraper implements ContentScraper { + + /** + * 동아일보 기사 URL에서 본문 내용, 문단 단위로 스크래핑 + * + * @param url 기사 URL + * @return 문단 단위로 문단 텍스트 + */ + @Override + public List scrapeParagraphs(String url) { + Document document = connectToUrl(url); + return extractDongaContent(document); + } + + /** + * 동아일보 본문 추출 (section.news_view에서 h2, figure 제외) + * + * @param document JSoup Document + * @return 문단 리스트 + */ + private List extractDongaContent(Document document) { + Element newsView = HtmlScraperUtils.findElement(document, "section.news_view"); + if (null == newsView) { + return new ArrayList<>(); + } + + Element processedView = HtmlScraperUtils.removeTags(newsView, "h2", "figure", "img"); + + String fullText = processedView.text(); + List paragraphs = extractParagraphsByQuotes(fullText); + + return paragraphs; + } + + /** + * 큰따옴표 기준으로 문단 추출 + * + * @param text 전체 텍스트 + * @return 문단 리스트 + */ + private List extractParagraphsByQuotes(String text) { + List paragraphs = new ArrayList<>(); + + String[] parts = text.split("\""); + + for (int i = 1; i < parts.length; i += 2) { + String paragraph = parts[i].trim(); + if (!paragraph.isEmpty()) { + paragraphs.add(paragraph); + } + } + + if (paragraphs.isEmpty() && !text.trim().isEmpty()) { + paragraphs.add(text.trim()); + } + + return paragraphs; + } + + /** + * 동아일보 기사 URL에서 본문 내용을 텍스트로 스크래핑 + * + * @param url 기사 URL + * @return 스크래핑된 본문 + */ + @Override + public String scrapeContent(String url) { + List paragraphs = scrapeParagraphs(url); + return String.join("\n\n", paragraphs); + } + + /** + * 동아일보 기사 URL에서 이미지 URL을 스크래핑 + * + * @param url 기사 URL + * @return 스크래핑된 이미지 URL + * @throws ArticleCollectorException 스크래핑 중 오류 발생 시 FEED_PARSING_ERROR 예외 발생 + */ + @Override + public String scrapeImageUrl(String url) { + try { + Document document = Jsoup.connect(url).get(); + return extractImageUrlFromDocument(document); + } catch (IOException e) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.FEED_PARSING_ERROR, e); + } + } + + /** + * Document에서 이미지 URL 추출 + * + * @param document 파싱된 JSoup Document + * @return 추출된 이미지 URL + */ + private String extractImageUrlFromDocument(Document document) { + return HtmlScraperUtils.extractImageUrl(document, "section.news_view figure img"); + } + + /** + * 지원하는 매퍼 타입 반환 + * + * @return 동아일보 매퍼 타입 (da) + */ + @Override + public String getSupportedMapperType() { + return "da"; + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/implement/KhanContentScraper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/implement/KhanContentScraper.java new file mode 100644 index 0000000..48c1ac9 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/implement/KhanContentScraper.java @@ -0,0 +1,203 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.implement; + +import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; +import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.ContentScraper; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.util.HtmlScraperUtils; + +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import org.springframework.stereotype.Component; + +import java.util.ArrayList; +import java.util.List; + +/** + * 경향신문 기사 본문 스크래퍼 구현체 + * + * @author 양병학 + * @since 2025-05-13 최초 작성 + */ +@Component +public class KhanContentScraper implements ContentScraper { + + /** + * 경향신문 기사 URL에서 본문 내용을 문단 단위로 스크래핑 + * + * @param url 기사 URL + * @return 문단 단위로 나눈 본문 리스트 + */ + @Override + public List scrapeParagraphs(String url) { + try { + Document document = connectToUrl(url); + List paragraphs = extractKhanContent(document); + return paragraphs; + } catch (Exception e) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.ITEM_MAPPING_ERROR, e); + } + } + + /** + * 경향신문 본문 추출 (article.art_body에서 h3, div.art_photo 제외) + * + * @param document JSoup Document + * @return 문단 리스트 + */ + private List extractKhanContent(Document document) { + Element artBody = HtmlScraperUtils.findElement(document, "article.art_body"); + + if (artBody == null) { + artBody = HtmlScraperUtils.findElement(document, "div.art_body"); + } + + if (artBody == null) { + artBody = HtmlScraperUtils.findElement(document, "div.article_view"); + } + + if (artBody == null) { + artBody = HtmlScraperUtils.findElement(document, "div.article-body"); + } + + if (artBody == null) { + return new ArrayList<>(); + } + + return extractKhanContentFromElement(artBody); + } + + /** + * 경향신문 본문 요소에서 콘텐츠 추출 + * + * @param artBody 기사 본문 요소 + * @return 문단 리스트 + */ + private List extractKhanContentFromElement(Element artBody) { + Element processedBody = HtmlScraperUtils.removeTags(artBody, "h3", "div.art_photo", "img"); + + processedBody.select("*").forEach(el -> { + el.removeAttr("align"); + el.removeAttr("vspace"); + el.removeAttr("hspace"); + el.removeAttr("style"); + el.removeAttr("width"); + el.removeAttr("height"); + }); + + Elements paragraphs = processedBody.select("p"); + + if (paragraphs.isEmpty()) { + paragraphs = processedBody.select("div.article_paragraph"); + } + + if (paragraphs.isEmpty()) { + paragraphs = processedBody.select("span.article_text"); + } + + if (paragraphs.isEmpty()) { + List fallback = new ArrayList<>(); + String fullText = processedBody.text().trim(); + if (!fullText.isEmpty()) { + fallback.add(fullText); + } + return fallback; + } + + List result = paragraphs.stream() + .map(Element::text) + .filter(text -> !text.trim().isEmpty()) + .toList(); + + return result; + } + + /** + * 경향신문 기사 URL에서 본문 내용을 텍스트로 스크래핑 + * + * @param url 기사 URL + * @return 스크래핑된 본문 + */ + @Override + public String scrapeContent(String url) { + List paragraphs = scrapeParagraphs(url); + return String.join("\n\n", paragraphs); + } + + /** + * 경향신문 기사 URL에서 이미지 URL을 스크래핑 + * + * @param url 기사 URL + * @return 스크래핑된 이미지 URL + */ + @Override + public String scrapeImageUrl(String url) { + try { + Document document = connectToUrl(url); + String imageUrl = extractImageUrlFromDocument(document); + return imageUrl; + } catch (Exception e) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.ITEM_MAPPING_ERROR, e); + } + } + + /** + * Document에서 이미지 URL 추출 + * + * @param document 파싱된 JSoup Document + * @return 추출된 이미지 URL + */ + private String extractImageUrlFromDocument(Document document) { + Element metaImg = document.selectFirst("meta[property=og:image]"); + if (metaImg != null && !metaImg.attr("content").isEmpty()) { + return metaImg.attr("content"); + } + + Element mainImg = document.selectFirst("picture img"); + if (mainImg != null && !mainImg.attr("src").isEmpty()) { + return mainImg.attr("abs:src"); + } + + Element source = document.selectFirst("picture source"); + if (source != null && !source.attr("srcset").isEmpty()) { + String srcset = source.attr("srcset"); + String[] sources = srcset.split(","); + if (sources.length > 0) { + String firstSource = sources[0].trim().split("\\s+")[0]; + return source.absUrl("srcset").isEmpty() ? firstSource : source.absUrl("srcset"); + } + } + + Element contentImg = document.selectFirst("article.art_body img"); + if (contentImg != null && !contentImg.attr("src").isEmpty()) { + return contentImg.attr("abs:src"); + } + + Element imgContainer = document.selectFirst("div.art_photo img"); + if (imgContainer != null && !imgContainer.attr("src").isEmpty()) { + return imgContainer.attr("abs:src"); + } + + Element figureImg = document.selectFirst("figure img"); + if (figureImg != null && !figureImg.attr("src").isEmpty()) { + return figureImg.attr("abs:src"); + } + + Element anyImg = document.selectFirst("img"); + if (anyImg != null && !anyImg.attr("src").isEmpty()) { + return anyImg.attr("abs:src"); + } + + return ""; + } + + /** + * 지원하는 매퍼 타입 반환 + * + * @return 경향신문 매퍼 타입 (kh) + */ + @Override + public String getSupportedMapperType() { + return "kh"; + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/util/HtmlScraperUtils.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/util/HtmlScraperUtils.java new file mode 100644 index 0000000..009396d --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/util/HtmlScraperUtils.java @@ -0,0 +1,203 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.util; + +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; + +/** + * HTML 태그별 스크래핑 유틸리티 클래스 + * + * @author 양병학 + * @since 2025-05-13 최초 작성 + */ +public class HtmlScraperUtils { + + /** + * 지정된 CSS 선택자에 해당하는 요소 찾기 + * + * @param document JSoup Document + * @param selector CSS 선택자 + * @return 찾은 요소, 없으면 null + */ + public static Element findElement(Document document, String selector) { + return null != document ? document.selectFirst(selector) : null; + } + + /** + * 지정된 CSS 선택자에 해당하는 모든 요소 찾기 + * + * @param document JSoup Document + * @param selector CSS 선택자 + * @return 찾은 요소들의 목록 + */ + public static Elements findElements(Document document, String selector) { + return null != document ? document.select(selector) : new Elements(); + } + + /** + * 특정 요소에서 지정된 태그들 제거 + * + * @param element 처리할 요소 + * @param tagsToRemove 제거할 태그 목록 (예: "h2", "figure") + * @return 태그가 제거된 요소 (원본은 변경되지 않음) + */ + public static Element removeTags(Element element, String... tagsToRemove) { + return null == element ? null : doRemoveTags(element, tagsToRemove); + } + + private static Element doRemoveTags(Element element, String... tagsToRemove) { + Element clone = element.clone(); + + for (String tag : tagsToRemove) { + clone.select(tag).remove(); + } + + return clone; + } + + /** + * 요소에서 텍스트 추출 (HTML 태그 제거) + * + * @param element 추출할 요소 + * @return 추출된 텍스트, 요소가 null이면 빈 문자열 + */ + public static String extractText(Element element) { + return null != element ? element.text() : ""; + } + + /** + * 여러 요소에서 텍스트 추출하여 결합 + * + * @param elements 처리할 요소들 + * @param separator 텍스트 사이에 넣을 구분자 (예: "\n\n") + * @return 결합된 텍스트 + */ + public static String extractCombinedText(Elements elements, String separator) { + return null != elements && !elements.isEmpty() + ? String.join(separator, elements.stream().map(Element::text).collect(Collectors.toList())) + : ""; + } + + /** + * p 태그의 내용을 개별 문단으로 추출 + * + * @param container p 태그를 포함하는 요소 + * @return 각 p 태그의 내용을 담은 문단 리스트 + */ + public static List extractParagraphs(Element container) { + return null == container ? new ArrayList<>() : doParagraphExtraction(container); + } + + /** + * p 태그 추출 실제 로직 + * + * @param container p 태그를 포함하는 요소 + * @return 추출된 문단 리스트 + */ + private static List doParagraphExtraction(Element container) { + Elements paragraphs = container.select("p"); + return paragraphs.stream() + .map(Element::text) + .filter(text -> !text.trim().isEmpty()) + .collect(Collectors.toList()); + } + + /** + * p 태그의 텍스트를 결합하여 추출 + * + * @param container p 태그를 포함하는 요소 + * @param separator 텍스트 사이에 넣을 구분자 (기본값: "\n\n") + * @return 결합된.텍스트 + */ + public static String extractParagraphText(Element container, String separator) { + return null != container + ? extractCombinedText(container.select("p"), separator) + : ""; + } + + /** + * p 태그의 텍스트를 줄바꿈으로 결합하여 추출 (기본 구분자: "\n\n") + * + * @param container p 태그를 포함하는 요소 + * @return 결합된 텍스트 + */ + public static String extractParagraphText(Element container) { + return extractParagraphText(container, "\n\n"); + } + + /** + * 이미지 URL 추출 + * + * @param document JSoup Document + * @param imgSelector 이미지 선택자 + * @return 이미지 URL, 없으면 빈 문자열 + */ + public static String extractImageUrl(Document document, String imgSelector) { + Element img = findElement(document, imgSelector); + return null != img ? img.absUrl("src") : ""; + } + + /** + * HTML 요소 내용 처리 - 공통 메서드 + * + * @param document JSoup Document + * @param selector 요소 선택자 + * @param excludeTags 제외할 태그 목록 + * @return 처리된 텍스트 + */ + public static String processElement(Document document, String selector, String... excludeTags) { + Element element = findElement(document, selector); + return null != element + ? extractText(removeTags(element, excludeTags)) + : ""; + } + + /** + * section 태그 내용 처리 + * + * @param document JSoup Document + * @param sectionSelector section 태그 선택자 + * @param excludeTags 제외할 태그 목록 + * @return 처리된 텍스트 + */ + public static String processSection(Document document, String sectionSelector, String... excludeTags) { + Element section = findElement(document, sectionSelector); + return null != section + ? extractText(removeTags(section, excludeTags)) + : ""; + } + + /** + * article 태그 내용 처리 + * + * @param document JSoup Document + * @param articleSelector article 태그 선택자 + * @param excludeTags 제외할 태그 목록 + * @return 처리된 텍스트 + */ + public static String processArticle(Document document, String articleSelector, String... excludeTags) { + Element article = findElement(document, articleSelector); + return null != article + ? extractText(removeTags(article, excludeTags)) + : ""; + } + + /** + * div 태그 내용 처리 + * + * @param document JSoup Document + * @param divSelector div 태그 선택자 + * @param excludeTags 제외할 태그 목록 + * @return 처리된 텍스트 + */ + public static String processDiv(Document document, String divSelector, String... excludeTags) { + Element div = findElement(document, divSelector); + return null != div + ? extractText(removeTags(div, excludeTags)) + : ""; + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/entity/ArticleEntity.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/entity/ArticleEntity.java index 9d8e612..a8dbfaa 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/entity/ArticleEntity.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/entity/ArticleEntity.java @@ -51,8 +51,13 @@ public class ArticleEntity { @Column(columnDefinition = "TEXT") private String description; - @Column(name = "is_summary") - private boolean isSummary; + @Setter + @Column(name = "summary", columnDefinition = "TEXT") + private String summary; + + @Setter + @Column(name = "image_url") + private String imageUrl; @Column(name = "created_at") private LocalDateTime createdAt; @@ -66,4 +71,7 @@ public String getDescription(){ return description != null ? description : ""; } + public String getSummary() { + return summary != null ? summary : ""; + } } \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/RssNewsRepository.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/RssNewsRepository.java index a4b327a..44502f3 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/RssNewsRepository.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/RssNewsRepository.java @@ -3,10 +3,23 @@ import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; import org.springframework.stereotype.Repository; +import java.time.LocalDateTime; + @Repository public interface RssNewsRepository extends JpaRepository { boolean existsByLink(String link); + + /** + * 특정 언론사의 가장 최신 기사 발행일 조회 + * + * @param guidPrefix 언론사 GUID 접두어 (예: "KM", "DA", "KH") + * @return 가장 최신 발행일 + */ + @Query("SELECT MAX(a.pubDate) FROM ArticleEntity a WHERE a.guid LIKE CONCAT(:guidPrefix, '%')") + LocalDateTime findLatestPubDateByGuidPrefix(@Param("guidPrefix") String guidPrefix); } \ No newline at end of file From 10d905105e6f4f975f5849cb9693edd96faf93e8 Mon Sep 17 00:00:00 2001 From: "Yejeong, Ham" Date: Thu, 15 May 2025 18:52:22 +0900 Subject: [PATCH 21/36] =?UTF-8?q?=E2=9C=A8=20Feature:=20#59=20=EB=89=B4?= =?UTF-8?q?=EC=8A=A4=20=EC=A0=95=EB=B3=B4=20=EC=A0=9C=EA=B3=B5=20=ED=8F=AC?= =?UTF-8?q?=ED=8A=B8=20=EC=96=B4=EB=8C=91=ED=84=B0=20=EA=B5=AC=ED=98=84=20?= =?UTF-8?q?(#64)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ♻️ Refactor: RssNewsRepository 이름 변경 -> NewsInfoJpaRepository * ✨ Feature: ArticleEntityMapper 추가 * ✨ Feature: 뉴스 정보 제공 어댑터 구현 * 💄Style: 주석 위치 이동 * ✨ Feature: 데이터 가져오는 샘플 코드 추가 * Update src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/adapter/NewsInfoProviderAdapter.java ♻️ Refactor: 뉴스카운트 매직넘버 분리 --------- Co-authored-by: Atriel <118334518+Atriel1999@users.noreply.github.com> --- .../batch/index/domain/model/NewsInfo.java | 2 + .../collector/writer/ArticleWriter.java | 8 +-- .../jpa/adapter/NewsInfoProviderAdapter.java | 49 +++++++++++++++++++ .../jpa/mapper/ArticleEntityMapper.java | 17 +++++++ ...sitory.java => NewsInfoJpaRepository.java} | 2 +- .../NewsInfoProviderPortSample.java | 31 ++++++++++++ 6 files changed, 104 insertions(+), 5 deletions(-) create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/adapter/NewsInfoProviderAdapter.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/mapper/ArticleEntityMapper.java rename src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/{RssNewsRepository.java => NewsInfoJpaRepository.java} (91%) create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/sample/index/providerPort/NewsInfoProviderPortSample.java diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/domain/model/NewsInfo.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/domain/model/NewsInfo.java index 9091523..81550e9 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/domain/model/NewsInfo.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/domain/model/NewsInfo.java @@ -2,11 +2,13 @@ import java.time.LocalDateTime; +import lombok.Builder; import lombok.Getter; import lombok.RequiredArgsConstructor; @RequiredArgsConstructor @Getter +@Builder public class NewsInfo{ private final String newsId; private final String title; diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/writer/ArticleWriter.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/writer/ArticleWriter.java index 79bb31a..e66ccc3 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/writer/ArticleWriter.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/writer/ArticleWriter.java @@ -9,7 +9,7 @@ import org.springframework.stereotype.Component; import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; -import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.repository.RssNewsRepository; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.repository.NewsInfoJpaRepository; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -29,7 +29,7 @@ @RequiredArgsConstructor public class ArticleWriter implements ItemWriter> { - private final RssNewsRepository rssNewsRepository; + private final NewsInfoJpaRepository newsInfoJpaRepository; /** * 기사 리스트를 저장하며, 중복된 기사는 건너뛴다. @@ -44,7 +44,7 @@ public void write(Chunk> chunk) { AtomicInteger savedCount = new AtomicInteger(); chunk.getItems().stream() .flatMap(List::stream) - .filter(item -> !rssNewsRepository.existsByLink(item.getLink())) + .filter(item -> !newsInfoJpaRepository.existsByLink(item.getLink())) .forEach(item -> {saveItem(item, savedCount);}); log.info("새로 저장된 뉴스 개수: {}", savedCount.get()); } @@ -59,7 +59,7 @@ public void write(Chunk> chunk) { */ private void saveItem(ArticleEntity item, AtomicInteger savedCount) { try { - rssNewsRepository.save(item); + newsInfoJpaRepository.save(item); savedCount.incrementAndGet(); } catch (DataIntegrityViolationException e) { log.debug("중복 항목 감지: {}", item.getLink()); diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/adapter/NewsInfoProviderAdapter.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/adapter/NewsInfoProviderAdapter.java new file mode 100644 index 0000000..57fe50b --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/adapter/NewsInfoProviderAdapter.java @@ -0,0 +1,49 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.adapter; + +import java.util.List; + +import org.springframework.data.domain.PageRequest; +import org.springframework.data.domain.Pageable; +import org.springframework.data.domain.Sort; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.index.application.port.out.NewsInfoProviderPort; +import com.likelion.backendplus4.talkpick.batch.index.domain.model.NewsInfo; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.mapper.ArticleEntityMapper; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.repository.NewsInfoJpaRepository; + +import lombok.RequiredArgsConstructor; + +/** + * TODO: 이벤트 기반으로 색인 안된 뉴스만 제공하도록 수정 필요 + * NewsInfoProviderPort 인터페이스의 구현체로, + * JPA 리포지토리를 통해 뉴스 정보를 조회하는 어댑터 클래스입니다. + * 현재는 최근 100개 뉴스를 반환합니다. + * @since 2025-05-14 + */ +@Component +@RequiredArgsConstructor +public class NewsInfoProviderAdapter implements NewsInfoProviderPort { + private final NewsInfoJpaRepository newsInfoJpaRepository; + + /** + * 뉴스 정보를 최신순으로 최대 100건까지 조회하여 도메인 객체 리스트로 반환합니다. + * + * @return 뉴스 도메인 객체 리스트 + * @author 함예정 + * @since 2025-05-14 + */ + @Override + public List fetchAll() { + private static final int MAX_NEWS_COUNT = 100; + + Pageable pageable = PageRequest.of(0, MAX_NEWS_COUNT) + .withSort(Sort.by("pubDate").descending()); + + return newsInfoJpaRepository.findAll(pageable) + .getContent() + .stream() + .map(ArticleEntityMapper::toDomainFromEntity) + .toList(); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/mapper/ArticleEntityMapper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/mapper/ArticleEntityMapper.java new file mode 100644 index 0000000..6cbb9bc --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/mapper/ArticleEntityMapper.java @@ -0,0 +1,17 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.mapper; + +import com.likelion.backendplus4.talkpick.batch.index.domain.model.NewsInfo; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; + +public class ArticleEntityMapper { + public static NewsInfo toDomainFromEntity(ArticleEntity articleEntity) { + return NewsInfo.builder() + .newsId(articleEntity.getGuid()) + .title(articleEntity.getTitle()) + .content(articleEntity.getDescription()) + .publishedAt(articleEntity.getPubDate()) + .imageUrl(null) // TODO: 나중에 추가 예정 + .category(articleEntity.getCategory()) + .build(); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/RssNewsRepository.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/NewsInfoJpaRepository.java similarity index 91% rename from src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/RssNewsRepository.java rename to src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/NewsInfoJpaRepository.java index 44502f3..00432da 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/RssNewsRepository.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/NewsInfoJpaRepository.java @@ -10,7 +10,7 @@ import java.time.LocalDateTime; @Repository -public interface RssNewsRepository extends JpaRepository { +public interface NewsInfoJpaRepository extends JpaRepository { boolean existsByLink(String link); diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/index/providerPort/NewsInfoProviderPortSample.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/index/providerPort/NewsInfoProviderPortSample.java new file mode 100644 index 0000000..23270cf --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/index/providerPort/NewsInfoProviderPortSample.java @@ -0,0 +1,31 @@ +package com.likelion.backendplus4.talkpick.batch.sample.index.providerPort; + +import static com.likelion.backendplus4.talkpick.batch.common.response.ApiResponse.*; + +import java.util.List; + +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +import com.likelion.backendplus4.talkpick.batch.common.response.ApiResponse; +import com.likelion.backendplus4.talkpick.batch.index.application.port.out.NewsInfoProviderPort; +import com.likelion.backendplus4.talkpick.batch.index.domain.model.NewsInfo; + +import lombok.RequiredArgsConstructor; + +@RestController +@RequiredArgsConstructor +@RequestMapping("/sample/news/info") +public class NewsInfoProviderPortSample { + private final NewsInfoProviderPort newsInfoProviderPort; + + /** + * 실제 사용시에는 Response 객체로 변환 필요 + */ + @GetMapping + public ResponseEntity>> fetchAll() { + return success(newsInfoProviderPort.fetchAll()); + } +} From 46c4508d636211dab51830cef25fef73b94eecff Mon Sep 17 00:00:00 2001 From: JUNG ANSIK Date: Thu, 15 May 2025 18:52:50 +0900 Subject: [PATCH 22/36] =?UTF-8?q?=E2=9C=A8=C2=A0=20Feature:=20#58-?= =?UTF-8?q?=EB=89=B4=EC=8A=A4-=EB=8D=B0=EC=9D=B4=ED=84=B0-Elasticsearch-?= =?UTF-8?q?=EC=A0=80=EC=9E=A5=20(#62)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ✨  Feature/#58-뉴스-데이터-Elasticsearch-저장 * ♻️ Refactor: Error핸들 로직 추가 --- .../elasticsearch/ElasticsearchConfig.java | 7 + .../application/port/in/NewsIndexUseCase.java | 5 + .../port/out/NewsInfoIndexRepositoryPort.java | 22 +++ .../port/out/NewsInfoProviderPort.java | 12 ++ .../application/service/NewsIndexService.java | 43 +++++ .../batch/index/domain/model/NewsInfo.java | 5 + .../adapter/ElasticsearchNewsInfoAdapter.java | 168 ++++++++++++++++++ .../adapter/document/NewsInfoDocument.java | 33 ++++ .../mapper/NewsInfoDocumentMapper.java | 34 ++++ .../controller/NewsIndexController.java | 41 +++++ .../index/NewsIndexServiceTestImpl.java | 53 ++++++ src/main/resources/application.yml | 6 + 12 files changed, 429 insertions(+) create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/elasticsearch/ElasticsearchConfig.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/index/application/port/in/NewsIndexUseCase.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/index/application/port/out/NewsInfoIndexRepositoryPort.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/index/application/service/NewsIndexService.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/index/infrastructure/adapter/ElasticsearchNewsInfoAdapter.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/index/infrastructure/adapter/document/NewsInfoDocument.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/index/infrastructure/adapter/mapper/NewsInfoDocumentMapper.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/index/presentation/controller/NewsIndexController.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/sample/index/NewsIndexServiceTestImpl.java diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/elasticsearch/ElasticsearchConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/elasticsearch/ElasticsearchConfig.java new file mode 100644 index 0000000..8688c24 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/elasticsearch/ElasticsearchConfig.java @@ -0,0 +1,7 @@ +package com.likelion.backendplus4.talkpick.batch.common.configuration.elasticsearch; + +import org.springframework.context.annotation.Configuration; + +@Configuration +public class ElasticsearchConfig { +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/application/port/in/NewsIndexUseCase.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/application/port/in/NewsIndexUseCase.java new file mode 100644 index 0000000..1c142b4 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/application/port/in/NewsIndexUseCase.java @@ -0,0 +1,5 @@ +package com.likelion.backendplus4.talkpick.batch.index.application.port.in; + +public interface NewsIndexUseCase { + int indexAllNewsInfo(); +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/application/port/out/NewsInfoIndexRepositoryPort.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/application/port/out/NewsInfoIndexRepositoryPort.java new file mode 100644 index 0000000..110217c --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/application/port/out/NewsInfoIndexRepositoryPort.java @@ -0,0 +1,22 @@ +package com.likelion.backendplus4.talkpick.batch.index.application.port.out; + +import java.util.List; + +import com.likelion.backendplus4.talkpick.batch.index.domain.model.NewsInfo; + +/** + * 뉴스 정보를 색인 저장소에 저장하는 포트 인터페이스 + * + * @since 2025-05-15 + */ +public interface NewsInfoIndexRepositoryPort { + /** + * 뉴스 정보 리스트를 색인 저장소에 저장한다. + * + * @param newsList 저장할 뉴스 정보 리스트 + * @return 저장된 뉴스 정보 건수 + * @author 정안식 + * @since 2025-05-15 + */ + int saveAll(List newsList); +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/application/port/out/NewsInfoProviderPort.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/application/port/out/NewsInfoProviderPort.java index 97fc87e..ff64069 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/application/port/out/NewsInfoProviderPort.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/application/port/out/NewsInfoProviderPort.java @@ -4,6 +4,18 @@ import com.likelion.backendplus4.talkpick.batch.index.domain.model.NewsInfo; +/** + * 외부 또는 내부에서 뉴스 정보를 조회하는 포트 인터페이스 + * + * @since 2025-05-15 + */ public interface NewsInfoProviderPort { + /** + * 저장된 모든 뉴스 정보를 조회한다. + * + * @return 조회된 뉴스 정보 리스트 + * @author 정안식 + * @since 2025-05-15 + */ List fetchAll(); } diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/application/service/NewsIndexService.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/application/service/NewsIndexService.java new file mode 100644 index 0000000..7c243a2 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/application/service/NewsIndexService.java @@ -0,0 +1,43 @@ +package com.likelion.backendplus4.talkpick.batch.index.application.service; + +import java.util.List; + +import org.elasticsearch.index.IndexService; +import org.springframework.stereotype.Service; + +import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.EntryExitLog; +import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.TimeTracker; +import com.likelion.backendplus4.talkpick.batch.index.application.port.in.NewsIndexUseCase; +import com.likelion.backendplus4.talkpick.batch.index.application.port.out.NewsInfoIndexRepositoryPort; +import com.likelion.backendplus4.talkpick.batch.index.application.port.out.NewsInfoProviderPort; +import com.likelion.backendplus4.talkpick.batch.index.domain.model.NewsInfo; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +/** + * 뉴스 정보를 조회하고 색인 저장소에 전달하는 비즈니스 로직 서비스 + * + * @since 2025-05-15 + */ +@Slf4j +@RequiredArgsConstructor +@Service +public class NewsIndexService implements NewsIndexUseCase { + private final NewsInfoProviderPort newsInfoProviderPort; + private final NewsInfoIndexRepositoryPort newsInfoIndexRepositoryPort; + + /** + * 모든 뉴스 정보를 가져와 색인 저장소에 저장하고 저장된 건수를 반환한다. + * + * @return 색인된 뉴스 정보 건수 + * @author 정안식 + * @since 2025-05-15 + */ + @EntryExitLog() + @Override + public int indexAllNewsInfo() { + List newsInfoList = newsInfoProviderPort.fetchAll(); + return newsInfoIndexRepositoryPort.saveAll(newsInfoList); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/domain/model/NewsInfo.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/domain/model/NewsInfo.java index 81550e9..9cd3def 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/domain/model/NewsInfo.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/domain/model/NewsInfo.java @@ -6,6 +6,11 @@ import lombok.Getter; import lombok.RequiredArgsConstructor; +/** + * 뉴스 정보를 표현하는 도메인 모델 + * + * @since 2025-05-15 + */ @RequiredArgsConstructor @Getter @Builder diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/infrastructure/adapter/ElasticsearchNewsInfoAdapter.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/infrastructure/adapter/ElasticsearchNewsInfoAdapter.java new file mode 100644 index 0000000..60db859 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/infrastructure/adapter/ElasticsearchNewsInfoAdapter.java @@ -0,0 +1,168 @@ +package com.likelion.backendplus4.talkpick.batch.index.infrastructure.adapter; + +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import org.springframework.beans.factory.annotation.Value; +import org.springframework.data.elasticsearch.core.ElasticsearchOperations; +import org.springframework.data.elasticsearch.core.IndexOperations; +import org.springframework.data.elasticsearch.core.IndexedObjectInformation; +import org.springframework.data.elasticsearch.core.RefreshPolicy; +import org.springframework.data.elasticsearch.core.document.Document; +import org.springframework.data.elasticsearch.core.mapping.IndexCoordinates; +import org.springframework.data.elasticsearch.core.query.BulkOptions; +import org.springframework.data.elasticsearch.core.query.IndexQuery; +import org.springframework.data.elasticsearch.core.query.IndexQueryBuilder; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.index.application.port.out.NewsInfoIndexRepositoryPort; +import com.likelion.backendplus4.talkpick.batch.index.domain.model.NewsInfo; +import com.likelion.backendplus4.talkpick.batch.index.infrastructure.adapter.document.NewsInfoDocument; +import com.likelion.backendplus4.talkpick.batch.index.infrastructure.adapter.mapper.NewsInfoDocumentMapper; + +import jakarta.annotation.PostConstruct; + +/** + * Spring Data Elasticsearch를 이용해 뉴스 정보를 Bulk 색인하고 저장된 개수를 반환하는 어댑터 + * + * @since 2025-05-15 + */ +@Component +public class ElasticsearchNewsInfoAdapter implements NewsInfoIndexRepositoryPort { + + private final ElasticsearchOperations esOperations; + private final NewsInfoDocumentMapper mapper; + private final String indexName; + private IndexOperations indexOperations; + + public ElasticsearchNewsInfoAdapter(ElasticsearchOperations esOperations, + NewsInfoDocumentMapper mapper, + @Value("${news.index.name}") String indexName) { + this.esOperations = esOperations; + this.mapper = mapper; + this.indexName = indexName; + } + + /** + * 초기화 단계에서 인덱스를 준비하고 존재하지 않으면 생성한다. + * + * @author 정안식 + * @since 2025-05-15 + */ + @PostConstruct + public void initIndex() { + this.indexOperations = esOperations.indexOps(IndexCoordinates.of(indexName)); + ensureIndexExists(this.indexOperations); + } + + /** + * 뉴스 정보 리스트를 Bulk 색인하고 색인된 개수를 반환한다. + * + * @param newsList 색인할 뉴스 정보 리스트 + * @return 색인된 객체 정보 리스트의 크기 + * @author 정안식 + * @since 2025-05-15 + */ + @Override + public int saveAll(List newsList) { + List queries = toIndexQueries(newsList); + List result = bulkIndex(indexOperations, queries); + + return result.size(); + } + + /** + * 인덱스가 없으면 생성하고 매핑을 설정한다. + * + * @param ops 인덱스 운영 객체 + * @author 정안식 + * @since 2025-05-15 + */ + private void ensureIndexExists(IndexOperations ops) { + try { + if (!ops.exists()) { + ops.create(); + ops.putMapping(Document + .create() + .append("properties", mappingProperties())); + } + } catch (Exception e) { + throw new RuntimeException("Failed to create or map index [" + indexName + "]", e); + } + } + + /** + * 문서 매핑에 사용할 Elasticsearch 프로퍼티 맵을 반환한다. + * + * @return 매핑 프로퍼티 맵 + * @author 정안식 + * @since 2025-05-15 + */ + private Map mappingProperties() { + return Map.of( + NewsInfoDocument.FIELD_ID, Map.of("type", "keyword"), + NewsInfoDocument.FIELD_TITLE, Map.of( + "type", "text", + "analyzer", NewsInfoDocument.ANALYZER_NORI, + "fields", Map.of( + NewsInfoDocument.FIELD_KEYWORD, Map.of("type", "keyword") + ) + ), + NewsInfoDocument.FIELD_CONTENT, Map.of( + "type", "text", + "analyzer", NewsInfoDocument.ANALYZER_NORI, + "fields", Map.of( + NewsInfoDocument.FIELD_KEYWORD, Map.of("type", "keyword") + ) + ), + NewsInfoDocument.FIELD_PUBLISHED_AT, Map.of("type", "date"), + NewsInfoDocument.FIELD_IMAGE_URL, Map.of("type", "keyword"), + NewsInfoDocument.FIELD_CATEGORY, Map.of("type", "keyword") + ); + } + + /** + * 도메인 객체를 Elasticsearch 색인 쿼리로 변환한다. + * + * @param newsList 도메인 객체 리스트 + * @return 색인 쿼리 리스트 + * @author 정안식 + * @since 2025-05-15 + */ + private List toIndexQueries(List newsList) { + return newsList.stream() + .map(n -> new IndexQueryBuilder() + .withId(n.getNewsId()) + .withObject(mapper.toDocument(n)) + .build()) + .collect(Collectors.toList()); + } + + /** + * Bulk 옵션을 사용해 쿼리를 실행하고 결과 정보를 반환한다. + * + * @param indexOperations 인덱스 운영 객체 + * @param queries 색인 쿼리 리스트 + * @return 색인 결과 객체 정보 리스트 + * @author 정안식 + * @since 2025-05-15 + */ + private List bulkIndex(IndexOperations indexOperations, + List queries) { + + try { + BulkOptions bulkOptions = BulkOptions.builder() + .withRefreshPolicy(RefreshPolicy.NONE) + .build(); + + return esOperations.bulkIndex( + queries, + bulkOptions, + indexOperations.getIndexCoordinates() + ); + } catch (Exception e) { + throw new RuntimeException("Failed to bulk index documents into [" + indexName + "]", e); + } + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/infrastructure/adapter/document/NewsInfoDocument.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/infrastructure/adapter/document/NewsInfoDocument.java new file mode 100644 index 0000000..02319a5 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/infrastructure/adapter/document/NewsInfoDocument.java @@ -0,0 +1,33 @@ +package com.likelion.backendplus4.talkpick.batch.index.infrastructure.adapter.document; + +import java.time.LocalDateTime; + +import lombok.AllArgsConstructor; +import lombok.Getter; + +/** + * Elasticsearch에 저장될 뉴스 정보 문서 모델 클래스 + * + * @since 2025-05-15 + */ +@Getter +@AllArgsConstructor +public class NewsInfoDocument { + private final String newsId; + private final String title; + private final String content; + private final LocalDateTime publishedAt; + private final String imageUrl; + private final String category; + + public static final String FIELD_ID = "newsId"; + public static final String FIELD_TITLE = "title"; + public static final String FIELD_CONTENT = "content"; + public static final String FIELD_PUBLISHED_AT = "publishedAt"; + public static final String FIELD_IMAGE_URL = "imageUrl"; + public static final String FIELD_CATEGORY = "category"; + + public static final String ANALYZER_NORI = "nori"; + public static final String FIELD_KEYWORD = "keyword"; +} + diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/infrastructure/adapter/mapper/NewsInfoDocumentMapper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/infrastructure/adapter/mapper/NewsInfoDocumentMapper.java new file mode 100644 index 0000000..9ae013a --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/infrastructure/adapter/mapper/NewsInfoDocumentMapper.java @@ -0,0 +1,34 @@ +package com.likelion.backendplus4.talkpick.batch.index.infrastructure.adapter.mapper; + +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.index.domain.model.NewsInfo; +import com.likelion.backendplus4.talkpick.batch.index.infrastructure.adapter.document.NewsInfoDocument; + +/** + * 도메인 모델 NewsInfo를 Elasticsearch 문서 모델로 변환하는 매퍼 + * + * @since 2025-05-15 + */ +@Component +public class NewsInfoDocumentMapper { + /** + * NewsInfo 도메인 객체를 NewsInfoDocument로 변환한다. + * + * @param news 변환할 도메인 객체 + * @return 변환된 문서 객체 + * @author 정안식 + * @since 2025-05-15 + */ + public NewsInfoDocument toDocument(NewsInfo news) { + return new NewsInfoDocument( + news.getNewsId(), + news.getTitle(), + news.getContent(), + news.getPublishedAt(), + news.getImageUrl(), + news.getCategory() + ); + } +} + diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/presentation/controller/NewsIndexController.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/presentation/controller/NewsIndexController.java new file mode 100644 index 0000000..563c25d --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/presentation/controller/NewsIndexController.java @@ -0,0 +1,41 @@ +package com.likelion.backendplus4.talkpick.batch.index.presentation.controller; + +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.EntryExitLog; +import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.TimeTracker; +import com.likelion.backendplus4.talkpick.batch.common.response.ApiResponse; +import com.likelion.backendplus4.talkpick.batch.index.application.port.in.NewsIndexUseCase; + +import lombok.RequiredArgsConstructor; + +/** + * 뉴스 데이터 일괄 색인을 위한 REST 컨트롤러 + * + * @since 2025-05-15 + */ +@RestController +@RequestMapping("/news") +@RequiredArgsConstructor +public class NewsIndexController { + + private final NewsIndexUseCase indexUseCase; + + /** + * 전체 뉴스 정보를 색인하고 처리 건수를 반환한다. + * + * @return ApiResponse에 래핑된 색인된 뉴스 건수 + * @author 정안식 + * @since 2025-05-15 + */ + @EntryExitLog + @TimeTracker + @PostMapping("/index") + public ResponseEntity> indexAllNews() { + int count = indexUseCase.indexAllNewsInfo(); + return ApiResponse.success(count); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/index/NewsIndexServiceTestImpl.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/index/NewsIndexServiceTestImpl.java new file mode 100644 index 0000000..54470fe --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/index/NewsIndexServiceTestImpl.java @@ -0,0 +1,53 @@ +package com.likelion.backendplus4.talkpick.batch.sample.index; + +import java.time.LocalDateTime; +import java.util.List; + +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.EntryExitLog; +import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.LogMethodValues; +import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.TimeTracker; +import com.likelion.backendplus4.talkpick.batch.index.application.port.out.NewsInfoProviderPort; +import com.likelion.backendplus4.talkpick.batch.index.domain.model.NewsInfo; + +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@Component +public class NewsIndexServiceTestImpl implements NewsInfoProviderPort{ + + @EntryExitLog + @TimeTracker + @LogMethodValues + @Override + public List fetchAll() { + return List.of( + new NewsInfo( + "news-1", + "테스트 뉴스 1", + "첫 번째 테스트 뉴스의 내용입니다.", + LocalDateTime.of(2025, 5, 14, 10, 0), + "https://example.com/image1.jpg", + "테스트" + ), + new NewsInfo( + "news-2", + "테스트 뉴스 2", + "두 번째 테스트 뉴스의 내용입니다.", + LocalDateTime.of(2025, 5, 13, 11, 30), + "https://example.com/image2.jpg", + "테스트" + ), + new NewsInfo( + "news-3", + "테스트 뉴스 3", + "세 번째 테스트 뉴스의 내용입니다.", + LocalDateTime.of(2025, 5, 12, 14, 45), + "https://example.com/image3.jpg", + "테스트" + ) + ); + } +} + diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index ca0293e..af087ae 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -5,6 +5,8 @@ server: enabled: false spring: + elasticsearch: + uris: ${ELS_URI} ai: openai: api-key: ${OPENAI_API_KEY} @@ -52,3 +54,7 @@ decorator: article-collector: quartz: cron: "0 */1 * * * ?" + +news: + index: + name: news_index From feabc6804db93fefd1712492c2ba099369155777 Mon Sep 17 00:00:00 2001 From: "Yejeong, Ham" Date: Thu, 15 May 2025 19:00:45 +0900 Subject: [PATCH 23/36] =?UTF-8?q?=F0=9F=90=9B=20Fix:=20=EC=BB=B4=ED=8C=8C?= =?UTF-8?q?=EC=9D=BC=20=EC=98=A4=EB=A5=98=20=ED=95=B4=EA=B2=B0=20(static?= =?UTF-8?q?=20=EB=B3=80=EC=88=98=20=EC=9C=84=EC=B9=98=20=EC=88=98=EC=A0=95?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../infrastructure/jpa/adapter/NewsInfoProviderAdapter.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/adapter/NewsInfoProviderAdapter.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/adapter/NewsInfoProviderAdapter.java index 57fe50b..eb138fb 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/adapter/NewsInfoProviderAdapter.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/adapter/NewsInfoProviderAdapter.java @@ -23,7 +23,8 @@ */ @Component @RequiredArgsConstructor -public class NewsInfoProviderAdapter implements NewsInfoProviderPort { +public class NewsInfoProviderAdapter implements NewsInfoProviderPort + private static final int MAX_NEWS_COUNT = 100; private final NewsInfoJpaRepository newsInfoJpaRepository; /** @@ -35,8 +36,6 @@ public class NewsInfoProviderAdapter implements NewsInfoProviderPort { */ @Override public List fetchAll() { - private static final int MAX_NEWS_COUNT = 100; - Pageable pageable = PageRequest.of(0, MAX_NEWS_COUNT) .withSort(Sort.by("pubDate").descending()); From 9438a9384df783a02213de8ee93659a9323ce088 Mon Sep 17 00:00:00 2001 From: "Yejeong, Ham" Date: Thu, 15 May 2025 19:05:24 +0900 Subject: [PATCH 24/36] =?UTF-8?q?=F0=9F=90=9B=20Fix:=20=EC=BB=B4=ED=8C=8C?= =?UTF-8?q?=EC=9D=BC=20=EC=98=A4=EB=A5=98=20=EB=AC=B8=EC=A0=9C=20=ED=95=B4?= =?UTF-8?q?=EA=B2=B0=20(=EC=A4=91=EA=B4=84=ED=98=B8=20=EC=98=A4=ED=83=80?= =?UTF-8?q?=20=EC=88=98=EC=A0=95)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../infrastructure/jpa/adapter/NewsInfoProviderAdapter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/adapter/NewsInfoProviderAdapter.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/adapter/NewsInfoProviderAdapter.java index eb138fb..3e5d521 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/adapter/NewsInfoProviderAdapter.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/adapter/NewsInfoProviderAdapter.java @@ -23,7 +23,7 @@ */ @Component @RequiredArgsConstructor -public class NewsInfoProviderAdapter implements NewsInfoProviderPort +public class NewsInfoProviderAdapter implements NewsInfoProviderPort { private static final int MAX_NEWS_COUNT = 100; private final NewsInfoJpaRepository newsInfoJpaRepository; From cc0cb46f6cfbbde36c0b13ca75cce5965f22e8eb Mon Sep 17 00:00:00 2001 From: "Yejeong, Ham" Date: Thu, 15 May 2025 19:09:00 +0900 Subject: [PATCH 25/36] =?UTF-8?q?=F0=9F=90=9B=20Fix:=20=20=EC=BB=B4?= =?UTF-8?q?=ED=8C=8C=EC=9D=BC=20=EC=98=A4=EB=A5=98=20=EC=88=98=EC=A0=95=20?= =?UTF-8?q?(=ED=81=B4=EB=9E=98=EC=8A=A4=20=EC=9D=B4=EB=A6=84=20=EB=B3=80?= =?UTF-8?q?=EA=B2=BD=EC=97=90=20=EB=94=B0=EB=A5=B8=20=EC=88=98=EC=A0=95)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../collector/processor/RssFeedReader.java | 286 +++++++++--------- 1 file changed, 143 insertions(+), 143 deletions(-) diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/processor/RssFeedReader.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/processor/RssFeedReader.java index 2c05603..128c028 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/processor/RssFeedReader.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/processor/RssFeedReader.java @@ -17,7 +17,7 @@ import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; -import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.repository.RssNewsRepository; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.repository.NewsInfoJpaRepository; import com.rometools.rome.feed.synd.SyndEntry; import com.rometools.rome.feed.synd.SyndFeed; import com.rometools.rome.io.SyndFeedInput; @@ -35,145 +35,145 @@ @Slf4j @Component public class RssFeedReader { - private final RssNewsRepository rssNewsRepository; - private static final Map lastProcessedDateMap = new ConcurrentHashMap<>(); - - @Autowired - public RssFeedReader(RssNewsRepository rssNewsRepository) { - this.rssNewsRepository = rssNewsRepository; - } - - /** - * 주어진 피드 URL로부터 RSS 피드를 파싱하고, 최신 발행일 이후의 {@link SyndEntry} 리스트를 반환한다. - * - * @param feedUrl RSS 피드의 URL 문자열 - * @param mapperType 매퍼 타입 (언론사 코드) - * @return 파싱 및 필터링된 SyndEntry 목록 - * @modified 2025-05-18 최신 발행일 이후 데이터만 필터링하는 기능 추가 - * @author 함예정 - * @since 2025-05-10 - */ - public List getFeed(String feedUrl, String mapperType) { - URL url = getURL(feedUrl); - URLConnection connection = openConnectionWithTimeout(url); - List entries = parseRssEntries(connection); - - LocalDateTime latestPubDate = getLatestPubDate(mapperType); - - List filteredEntries = entries.stream() - .filter(entry -> isAfterLatestPubDate(entry, latestPubDate)) - .collect(Collectors.toList()); - - return filteredEntries; - } - - /** - * 언론사별 최신 발행일 조회 (캐싱 추가) - * - * @param mapperType 매퍼 타입 (언론사 코드) - * @return 최신 발행일 또는 기본값 - */ - private LocalDateTime getLatestPubDate(String mapperType) { - String guidPrefix = mapperType.toUpperCase(); - - LocalDateTime latestPubDate = rssNewsRepository.findLatestPubDateByGuidPrefix(guidPrefix); - - if (latestPubDate == null) { - latestPubDate = getDefaultPubDate(); - } - - lastProcessedDateMap.put(mapperType, latestPubDate); - return latestPubDate; - } - - private LocalDateTime getDefaultPubDate() { - LocalDateTime latestPubDate = LocalDateTime.now().minusDays(1); - return latestPubDate; - } - - /** - * 항목의 발행일이 최신 발행일보다 이후인지 확인 - * - * @param entry RSS 항목 - * @param latestPubDate 최신 발행일 - * @return 최신 발행일 이후면 true - */ - private boolean isAfterLatestPubDate(SyndEntry entry, LocalDateTime latestPubDate) { - if (entry.getPublishedDate() == null) { - log.debug("발행일 없음 - 항목 제외: {}", entry.getTitle()); - return false; - } - - LocalDateTime pubDate = convertToLocalDateTime(entry.getPublishedDate()); - - boolean isAfter = pubDate.isAfter(latestPubDate); - - return isAfter; - } - - /** - * Date 객체를 LocalDateTime으로 변환 - * - * @param date 변환할 Date 객체 - * @return 변환된 LocalDateTime - */ - private LocalDateTime convertToLocalDateTime(Date date) { - return date.toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime(); - } - - /** - * 문자열 형태의 URL을 {@link URL} 객체로 변환한다. - * - * @param feedUrl 문자열 형태의 URL - * @return URL 객체 - * @throws RuntimeException 유효하지 않은 URL 형식일 경우 - * @author 함예정 - * @since 2025-05-10 - */ - private URL getURL(String feedUrl) { - try { - return new URL(feedUrl); - } catch (MalformedURLException e) { - throw new RuntimeException(e); - } - } - - /** - * 지정된 URL에 대해 연결 타임아웃과 읽기 타임아웃을 설정한 후 URLConnection을 반환합니다. - * - * @param url 연결할 URL 객체 - * @return 설정된 타임아웃을 가진 URLConnection 객체 - * @throws RuntimeException 연결 중 IOException이 발생할 경우 런타임 예외로 래핑하여 던짐 - * @author 함예정 - * @since 2025-05-12 - */ - private URLConnection openConnectionWithTimeout(URL url) { - try { - URLConnection connection = url.openConnection(); - connection.setConnectTimeout(3000); - connection.setReadTimeout(5000); - return connection; - } catch (IOException e) { - throw new ArticleCollectorException(ArticleCollectorErrorCode.FEED_CONNECTION_ERROR, e); - } - } - - /** - * 주어진 URLConnection으로부터 RSS 피드를 읽어 SyndEntry 목록으로 파싱합니다. - * - * @param connection RSS 피드를 제공하는 URLConnection 객체 - * @return 파싱된 SyndEntry 객체 리스트 - * @throws ArticleCollectorException RSS 피드 파싱 중 오류가 발생한 경우 사용자 정의 예외로 래핑하여 던짐 - * @author 함예정 - * @since 2025-05-12 - */ - private List parseRssEntries(URLConnection connection) { - try (XmlReader reader = new XmlReader(connection)) { - SyndFeedInput input = new SyndFeedInput(); - SyndFeed syndFeed = input.build(reader); - return syndFeed.getEntries(); - } catch (Exception e) { - throw new ArticleCollectorException(ArticleCollectorErrorCode.FEED_PARSING_ERROR, e); - } - } -} \ No newline at end of file + private static final Map lastProcessedDateMap = new ConcurrentHashMap<>(); + private final NewsInfoJpaRepository rssNewsRepository; + + @Autowired + public RssFeedReader(NewsInfoJpaRepository rssNewsRepository) { + this.rssNewsRepository = rssNewsRepository; + } + + /** + * 주어진 피드 URL로부터 RSS 피드를 파싱하고, 최신 발행일 이후의 {@link SyndEntry} 리스트를 반환한다. + * + * @param feedUrl RSS 피드의 URL 문자열 + * @param mapperType 매퍼 타입 (언론사 코드) + * @return 파싱 및 필터링된 SyndEntry 목록 + * @modified 2025-05-18 최신 발행일 이후 데이터만 필터링하는 기능 추가 + * @author 함예정 + * @since 2025-05-10 + */ + public List getFeed(String feedUrl, String mapperType) { + URL url = getURL(feedUrl); + URLConnection connection = openConnectionWithTimeout(url); + List entries = parseRssEntries(connection); + + LocalDateTime latestPubDate = getLatestPubDate(mapperType); + + List filteredEntries = entries.stream() + .filter(entry -> isAfterLatestPubDate(entry, latestPubDate)) + .collect(Collectors.toList()); + + return filteredEntries; + } + + /** + * 언론사별 최신 발행일 조회 (캐싱 추가) + * + * @param mapperType 매퍼 타입 (언론사 코드) + * @return 최신 발행일 또는 기본값 + */ + private LocalDateTime getLatestPubDate(String mapperType) { + String guidPrefix = mapperType.toUpperCase(); + + LocalDateTime latestPubDate = rssNewsRepository.findLatestPubDateByGuidPrefix(guidPrefix); + + if (latestPubDate == null) { + latestPubDate = getDefaultPubDate(); + } + + lastProcessedDateMap.put(mapperType, latestPubDate); + return latestPubDate; + } + + private LocalDateTime getDefaultPubDate() { + LocalDateTime latestPubDate = LocalDateTime.now().minusDays(1); + return latestPubDate; + } + + /** + * 항목의 발행일이 최신 발행일보다 이후인지 확인 + * + * @param entry RSS 항목 + * @param latestPubDate 최신 발행일 + * @return 최신 발행일 이후면 true + */ + private boolean isAfterLatestPubDate(SyndEntry entry, LocalDateTime latestPubDate) { + if (entry.getPublishedDate() == null) { + log.debug("발행일 없음 - 항목 제외: {}", entry.getTitle()); + return false; + } + + LocalDateTime pubDate = convertToLocalDateTime(entry.getPublishedDate()); + + boolean isAfter = pubDate.isAfter(latestPubDate); + + return isAfter; + } + + /** + * Date 객체를 LocalDateTime으로 변환 + * + * @param date 변환할 Date 객체 + * @return 변환된 LocalDateTime + */ + private LocalDateTime convertToLocalDateTime(Date date) { + return date.toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime(); + } + + /** + * 문자열 형태의 URL을 {@link URL} 객체로 변환한다. + * + * @param feedUrl 문자열 형태의 URL + * @return URL 객체 + * @throws RuntimeException 유효하지 않은 URL 형식일 경우 + * @author 함예정 + * @since 2025-05-10 + */ + private URL getURL(String feedUrl) { + try { + return new URL(feedUrl); + } catch (MalformedURLException e) { + throw new RuntimeException(e); + } + } + + /** + * 지정된 URL에 대해 연결 타임아웃과 읽기 타임아웃을 설정한 후 URLConnection을 반환합니다. + * + * @param url 연결할 URL 객체 + * @return 설정된 타임아웃을 가진 URLConnection 객체 + * @throws RuntimeException 연결 중 IOException이 발생할 경우 런타임 예외로 래핑하여 던짐 + * @author 함예정 + * @since 2025-05-12 + */ + private URLConnection openConnectionWithTimeout(URL url) { + try { + URLConnection connection = url.openConnection(); + connection.setConnectTimeout(3000); + connection.setReadTimeout(5000); + return connection; + } catch (IOException e) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.FEED_CONNECTION_ERROR, e); + } + } + + /** + * 주어진 URLConnection으로부터 RSS 피드를 읽어 SyndEntry 목록으로 파싱합니다. + * + * @param connection RSS 피드를 제공하는 URLConnection 객체 + * @return 파싱된 SyndEntry 객체 리스트 + * @throws ArticleCollectorException RSS 피드 파싱 중 오류가 발생한 경우 사용자 정의 예외로 래핑하여 던짐 + * @author 함예정 + * @since 2025-05-12 + */ + private List parseRssEntries(URLConnection connection) { + try (XmlReader reader = new XmlReader(connection)) { + SyndFeedInput input = new SyndFeedInput(); + SyndFeed syndFeed = input.build(reader); + return syndFeed.getEntries(); + } catch (Exception e) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.FEED_PARSING_ERROR, e); + } + } +} From f473766560e1a6bc53baa8d78637569fe2630215 Mon Sep 17 00:00:00 2001 From: "Yejeong, Ham" Date: Sat, 17 May 2025 12:42:06 +0900 Subject: [PATCH 26/36] =?UTF-8?q?=F0=9F=90=9B=20Fix:=20=EC=BB=B4=ED=8C=8C?= =?UTF-8?q?=EC=9D=BC=20=EC=98=A4=EB=A5=98=20=EC=88=98=EC=A0=95=20(#67)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../infrastructure/collector/processor/RssFeedReader.java | 2 +- .../infrastructure/jpa/adapter/NewsInfoProviderAdapter.java | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/processor/RssFeedReader.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/processor/RssFeedReader.java index 128c028..b717ad2 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/processor/RssFeedReader.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/processor/RssFeedReader.java @@ -176,4 +176,4 @@ private List parseRssEntries(URLConnection connection) { throw new ArticleCollectorException(ArticleCollectorErrorCode.FEED_PARSING_ERROR, e); } } -} +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/adapter/NewsInfoProviderAdapter.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/adapter/NewsInfoProviderAdapter.java index 3e5d521..d4c1113 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/adapter/NewsInfoProviderAdapter.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/adapter/NewsInfoProviderAdapter.java @@ -2,6 +2,7 @@ import java.util.List; +import org.springframework.context.annotation.Primary; import org.springframework.data.domain.PageRequest; import org.springframework.data.domain.Pageable; import org.springframework.data.domain.Sort; @@ -23,6 +24,7 @@ */ @Component @RequiredArgsConstructor +@Primary public class NewsInfoProviderAdapter implements NewsInfoProviderPort { private static final int MAX_NEWS_COUNT = 100; private final NewsInfoJpaRepository newsInfoJpaRepository; From d102a86a74d9f1533e83c63e06f9c706b648195e Mon Sep 17 00:00:00 2001 From: "Yejeong, Ham" Date: Sat, 17 May 2025 23:19:45 +0900 Subject: [PATCH 27/36] =?UTF-8?q?=F0=9F=93=A6=20Chore:=20#39=20=EB=A0=88?= =?UTF-8?q?=ED=8F=AC=EC=A7=80=ED=86=A0=EB=A6=AC=20issue=20=EB=B0=8F=20pr?= =?UTF-8?q?=20=ED=85=9C=ED=94=8C=EB=A6=BF=20=EC=82=AD=EC=A0=9C=20(?= =?UTF-8?q?=EC=A1=B0=EC=A7=81=20=ED=85=9C=ED=94=8C=EB=A6=BF=20=EC=82=AC?= =?UTF-8?q?=EC=9A=A9)=20(#69)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/ISSUE_TEMPLATE/config.yml | 6 --- .github/ISSUE_TEMPLATE/epic.yml | 59 ------------------------ .github/ISSUE_TEMPLATE/feature.yml | 31 ------------- .github/ISSUE_TEMPLATE/tech-debt.yml | 60 ------------------------- .github/ISSUE_TEMPLATE/user-story.yml | 35 --------------- .github/PULL_REQUEST_TEMPLATE/bugfix.md | 46 ------------------- .github/pull_request_template.md | 31 ------------- 7 files changed, 268 deletions(-) delete mode 100644 .github/ISSUE_TEMPLATE/config.yml delete mode 100644 .github/ISSUE_TEMPLATE/epic.yml delete mode 100644 .github/ISSUE_TEMPLATE/feature.yml delete mode 100644 .github/ISSUE_TEMPLATE/tech-debt.yml delete mode 100644 .github/ISSUE_TEMPLATE/user-story.yml delete mode 100644 .github/PULL_REQUEST_TEMPLATE/bugfix.md delete mode 100644 .github/pull_request_template.md diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml deleted file mode 100644 index c2b4247..0000000 --- a/.github/ISSUE_TEMPLATE/config.yml +++ /dev/null @@ -1,6 +0,0 @@ -blank_issues_enabled: false - -contact_links: - - name: 아이디어 토론·Q&A는 여기로 - url: https://github.com/talkpick/talkpick-batch/discussions - about: 아이디어 토론·Q&A는 Discussions 탭을 이용해 주세요. diff --git a/.github/ISSUE_TEMPLATE/epic.yml b/.github/ISSUE_TEMPLATE/epic.yml deleted file mode 100644 index 96ab208..0000000 --- a/.github/ISSUE_TEMPLATE/epic.yml +++ /dev/null @@ -1,59 +0,0 @@ -name: "✨ Epic" -description: 큰 기능(에픽) 카드 – 하위 사용자 스토리를 모으는 부모 이슈 -title: "[Epic] " -labels: [epic] -assignees: [] - -body: - - type: textarea - id: goal - attributes: - label: 🎯 목표(Problem / Goal) - description: "이 에픽이 해결하려는 문제 또는 달성할 목표를 간략히 서술하세요." - placeholder: | - 예) “외부 인증 없이도 원‑클릭 로그인 기능을 제공하여, 신규 유저 유입 장벽을 낮춘다.” - validations: - required: true - - - type: textarea - id: background - attributes: - label: 📝 배경 / 맥락 - description: 설계 근거, 관련 문서·링크 등이 있으면 적어 주세요. - placeholder: | - - Figma UX 흐름: https://figma.com/… - - OAuth 2.0 Sequence 다이어그램 - validations: - required: false - - - type: textarea - id: scope - attributes: - label: 📋 범위 – 예상 하위 작업 체크리스트 - description: "*하위 Issue 를 추가하면 자동으로 갱신되므로 초안만 작성해도 됩니다.*" - placeholder: | - - [ ] UI 버튼 디자인 - - [ ] OAuth 동의 화면 호출 - - [ ] 신규 사용자 DB 저장 - render: markdown - validations: - required: false - - - type: textarea - id: dod - attributes: - label: ✅ 완료 기준(Definition of Done) - description: "에픽이 ‘Done’ 으로 이동되기 위한 최소 조건을 적으세요." - placeholder: | - - 모든 하위 Issue 완료 - - Dev/Staging 배포 확인 - - 릴리스 노트 작성 - validations: - required: true - - - type: markdown - attributes: - value: | - 🔗 **하위 이슈 연결 방법** - - 새 이슈를 만들 때 본문 맨 위에 `parent: #<이 Epic 번호>`를 쓰거나 - - Projects 보드에서 “Add child issue” 버튼을 누르세요. \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/feature.yml b/.github/ISSUE_TEMPLATE/feature.yml deleted file mode 100644 index 9f46135..0000000 --- a/.github/ISSUE_TEMPLATE/feature.yml +++ /dev/null @@ -1,31 +0,0 @@ -name: "✨ Feature" -description: "새로운 기능 추가" -labels: ["✨ Feature"] -projects: ["yakplus/1"] -body: - - type: textarea - attributes: - label: 📄 설명 - description: 새로운 기능에 대한 설명을 작성해 주세요. - placeholder: 자세히 적을수록 좋습니다! - validations: - required: true - - type: textarea - attributes: - label: ✅ 작업할 내용 - description: 할 일을 체크박스 형태로 작성해주세요. - placeholder: | - 🫧 - - [ ] <체크 1> - - [ ] <체크 2> - value: | - - [ ] <작업 1> - - [ ] <작업 2> - - [ ] <작업 3> - - [ ] 더입력 ... - validations: - required: true - - type: textarea - attributes: - label: 🙋🏻 참고 자료 - description: 참고 자료가 있다면 작성해 주세요. \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/tech-debt.yml b/.github/ISSUE_TEMPLATE/tech-debt.yml deleted file mode 100644 index a6add2d..0000000 --- a/.github/ISSUE_TEMPLATE/tech-debt.yml +++ /dev/null @@ -1,60 +0,0 @@ -name: "🛠️ Tech Debt / Refactor" -description: 리팩터링·성능 개선·보안 패치 등 기술 부채 상환 이슈 -title: "[Tech Debt] " -labels: [tech-debt] -assignees: [] - -body: - - type: textarea - id: area - attributes: - label: 📍 개선 대상(모듈·파일·클래스) - placeholder: | - 예) NewsController 의 300라인 이상 메소드 분리 - validations: - required: true - - - type: textarea - id: problem - attributes: - label: 🔎 현재 문제점 - description: "왜 부채가 되었는지, 어떤 위험·불편이 있는지 작성해 주세요." - placeholder: | - - 메소드 길이가 200라인을 넘어 가독성이 떨어집니다. - - 동일 로직이 Service 레이어에도 중복되어 유지보수가 어렵습니다. - validations: - required: true - - - type: textarea - id: proposal - attributes: - label: 💡 개선 방안(선택) - placeholder: | - - 메소드 분리 + 공통 로직 Utility 이동 - - 단위 테스트 추가 - validations: - required: false - - - type: dropdown - id: priority - attributes: - label: ⏰ 우선순위 - description: "해결 시기를 가늠할 수 있도록 심각도를 선택해 주세요." - options: - - high (즉시 상환: 다음 스프린트 포함) - - medium (2~3 스프린트 내) - - low (시간 날 때) - validations: - required: true - - - type: checkboxes - id: impact - attributes: - label: 기대 효과(복수 선택) - options: - - label: 읽기 쉬운 코드 - - label: 성능 개선 - - label: 버그 위험 감소 - - label: 보안 강화 - - label: 테스트 용이성 향상 - - label: 기타 (추가 건의) \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/user-story.yml b/.github/ISSUE_TEMPLATE/user-story.yml deleted file mode 100644 index 3e22402..0000000 --- a/.github/ISSUE_TEMPLATE/user-story.yml +++ /dev/null @@ -1,35 +0,0 @@ -name: "🗂️ 사용자 스토리" -description: 개발 작업 카드 (INVEST 형식) -title: "" -labels: [] -assignees: [] - -body: - - type: textarea - id: story - attributes: - label: 사용자 스토리 - description: "형식: **[역할]**로서 **[목표]**를 하고 싶다. 그 결과 **[이점]**을 얻는다." - placeholder: | - 예) 방문자로서 키워드로 뉴스를 검색하고 싶다. 그 결과 관련 기사를 빠르게 찾을 수 있다. - validations: - required: true - - - type: textarea - id: ac - attributes: - label: 수락 기준(Acceptance Criteria) - description: "동작 완료 조건 2~5개 (가능하면 Given‑When‑Then)." - placeholder: | - - [ ] 검색 결과는 0.5초 이내 반환된다 - - [ ] 결과가 없으면 '결과가 없습니다' 토스트가 표시된다 - validations: - required: true - - - type: checkboxes - id: dor - attributes: - label: Definition of Ready 체크 - options: - - label: 설명·목표가 명확하다 - - label: 수락 기준이 작성되었다 \ No newline at end of file diff --git a/.github/PULL_REQUEST_TEMPLATE/bugfix.md b/.github/PULL_REQUEST_TEMPLATE/bugfix.md deleted file mode 100644 index 66ef3a1..0000000 --- a/.github/PULL_REQUEST_TEMPLATE/bugfix.md +++ /dev/null @@ -1,46 +0,0 @@ - - -## 🐞 버그 설명 - - -## 🔍 원인(Root Cause) - - -## 🛠️ 수정(Implemented Changes) - - -## ✅ 테스트 방법 -1. [ ] `/login` 페이지 접근 -2. [ ] ‘GitHub로 로그인’ 클릭 → OAuth 동의 → 메인 화면 이동 -3. [ ] 토큰 만료 후 재시도 시 로그인 페이지로 정상 리다이렉트 - -> **CI 통합 테스트**도 포함되어야 합니다. - -## 📸 스크린샷 / 동영상(선택) - - ---- - -### 체크리스트 -- [ ] 관련 이슈 링크: `Fixes #` -- [ ] 단위 / 통합 테스트 통과 -- [ ] 코드리뷰 2인 승인 예정 -- [ ] Dev 환경 배포 확인 - - diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md deleted file mode 100644 index 17374f2..0000000 --- a/.github/pull_request_template.md +++ /dev/null @@ -1,31 +0,0 @@ -## 📌 PR 유형 (해당하는 항목에 모두 체크해주세요) -- [ ] Feat: 새로운 기능 추가 -- [ ] Fix: 버그 수정 -- [ ] Docs: 문서 수정 -- [ ] Style: 코드 포맷팅, 세미콜론 누락, 코드 변경이 없는 경우 -- [ ] Refactor: 코드 리팩토링 (기능 변경 없이 구조 개선) -- [ ] Test: 테스트 코드 추가 및 기존 테스트 리팩토링 -- [ ] Chore: 빌드 설정, 패키지 매니저 설정 등 기타 변경 -- [ ] Github: PR 템플릿, 이슈 템플릿, Github Actions 설정 등 -- [ ] Conflict: 머지 시 충돌 해결 - - -## ✨ 변경 사항 -- 이 PR에서 어떤 작업을 했는지 요약해주세요. -- 주요 변경 사항, 기능, 개선 내용을 자세히 작성해주세요. - - -## 🔍 리뷰어에게 -- 리뷰어가 집중해서 봐야 할 포인트가 있다면 알려주세요. -- 추가 설명이 필요한 부분이 있다면 작성해주세요. - - -## ✅ PR 체크리스트 -- [ ] 커밋 메시지를 컨벤션에 맞게 작성했습니다. -- [ ] 변경 사항을 로컬에서 테스트했습니다. -- [ ] 관련 라벨을 선택했습니다. - - -## 🔗 관련 이슈 -- 이 PR과 연결된 이슈가 있다면 번호를 적어주세요. -- `closed #이슈번호` 형태로 적으면 머지 시 자동으로 이슈가 닫힙니다. \ No newline at end of file From d95ad87b428e00ac1adb19ff80fd86a90638c301 Mon Sep 17 00:00:00 2001 From: "Yejeong, Ham" Date: Sun, 18 May 2025 18:17:13 +0900 Subject: [PATCH 28/36] =?UTF-8?q?=E2=9C=A8=20Feature:=20#70=20=EB=82=B4?= =?UTF-8?q?=EC=9A=A9=20=EC=9A=94=EC=95=BD=20=EC=B6=94=EA=B0=80=20(#71)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 📦 Chore: 임베딩 패키지 이동(news 내 인프라) * ✨ Feature: OpenAi 내용 요약 요청 및 기록 Job 추가 * ♻️ Refactor: Partitioner 공통 사용 클래스로 수정 * ✨ Feature: Entity에 내용 요약 Vector 필드 추가 * ✨ Feature: embedding item reader 추가 * ✨ Feature: Embedding할 item List 페이지 수 계산 추가 * ✨ Feature: 임베딩 processor 추가 * 📦 Chore: 주석 추가 * ✨ Feature: 임베딩 writer 추가 * ✨ Feature: 뉴스 임베딩 작업 추가 * 📦 Chore: 주석 추가 * 💄 Style: 코드 컨벤션 라인 정리 * ♻️ Refactor: 임베딩 예외 처리 수정 * ♻️ Refactor: Partitioner 계산 로직 책임 분리 --- .../configuration/openai/OpenaiConfig.java | 35 +++++- .../batch/embedding/EmbeddingPort.java | 13 -- .../config/executor/BatchJobExecutor.java | 8 +- ...fig.java => CollectorQuartzJobConfig.java} | 7 +- .../quartz/CollectorQuartzTriggerConfig.java | 49 ++++++++ .../config/quartz/QuartzTriggerConfig.java | 49 -------- .../config/ArticleEmbeddingJobConfig.java | 52 ++++++++ .../config/ArticleEmbeddingStepConfig.java | 101 ++++++++++++++++ .../batch}/exception/EmbeddingException.java | 2 +- .../exception/error/EmbeddingErrorCode.java | 6 +- .../ArticleEmbeddingPartitioner.java | 97 +++++++++++++++ .../processor/ArticleEmbeddingProcessor.java} | 56 ++++++--- .../reader/ArticleEmbeddingPageReader.java | 42 +++++++ .../batch/writer/ArticleEmbeddingWriter.java | 21 ++++ .../config/ArticleEmbeddingJobExecutor.java | 76 ++++++++++++ .../ArticleEmbeddingQuartzJobConfig.java | 40 +++++++ .../ArticleEmbeddingQuartzTriggerConfig.java | 57 +++++++++ .../converter/FloatArrayToJsonConverter.java | 100 ++++++++++++++++ .../exception/JpaConvertorException.java | 31 +++++ .../error/JpaConvertorErrorCode.java | 55 +++++++++ .../jpa/entity/ArticleEntity.java | 102 ++++++++++------ .../jpa/repository/NewsInfoJpaRepository.java | 22 ++-- .../batch/config/SummaryJobConfig.java | 47 ++++++++ .../batch/config/SummaryStepConfig.java | 98 ++++++++++++++++ .../exception/ArticleSummaryException.java | 23 ++++ .../error/ArticleSummaryErrorCode.java | 55 +++++++++ .../ArticleSummaryPartitioner.java | 111 ++++++++++++++++++ .../processor/ArticleSummaryProcessor.java | 53 +++++++++ .../reader/ArticleSummaryPageReader.java | 57 +++++++++ .../batch/writer/ArticleSummaryWriter.java | 34 ++++++ .../IdRangePartitionCalculator.java | 37 ++++++ .../partitioner/PartitionMapBuilder.java | 23 ++++ .../partitioner/dto/ArticleIdRange.java | 8 ++ src/main/resources/application.yml | 30 +++-- 34 files changed, 1444 insertions(+), 153 deletions(-) delete mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/embedding/EmbeddingPort.java rename src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/quartz/{QuartzJobConfig.java => CollectorQuartzJobConfig.java} (85%) create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/quartz/CollectorQuartzTriggerConfig.java delete mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/quartz/QuartzTriggerConfig.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/config/ArticleEmbeddingJobConfig.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/config/ArticleEmbeddingStepConfig.java rename src/main/java/com/likelion/backendplus4/talkpick/batch/{embedding => news/article/infrastructure/embedding/batch}/exception/EmbeddingException.java (84%) rename src/main/java/com/likelion/backendplus4/talkpick/batch/{embedding => news/article/infrastructure/embedding/batch}/exception/error/EmbeddingErrorCode.java (85%) create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/partitioner/ArticleEmbeddingPartitioner.java rename src/main/java/com/likelion/backendplus4/talkpick/batch/{embedding/OpenAIEmbeddingAdapter.java => news/article/infrastructure/embedding/batch/processor/ArticleEmbeddingProcessor.java} (52%) create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/reader/ArticleEmbeddingPageReader.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/writer/ArticleEmbeddingWriter.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/quartz/config/ArticleEmbeddingJobExecutor.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/quartz/config/ArticleEmbeddingQuartzJobConfig.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/quartz/config/ArticleEmbeddingQuartzTriggerConfig.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/converter/FloatArrayToJsonConverter.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/converter/exception/JpaConvertorException.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/converter/exception/error/JpaConvertorErrorCode.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/config/SummaryJobConfig.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/config/SummaryStepConfig.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/exception/ArticleSummaryException.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/exception/error/ArticleSummaryErrorCode.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/partitioner/ArticleSummaryPartitioner.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/processor/ArticleSummaryProcessor.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/reader/ArticleSummaryPageReader.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/writer/ArticleSummaryWriter.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/support/partitioner/IdRangePartitionCalculator.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/support/partitioner/PartitionMapBuilder.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/support/partitioner/dto/ArticleIdRange.java diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/openai/OpenaiConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/openai/OpenaiConfig.java index d65b0f1..f3a5f4d 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/openai/OpenaiConfig.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/openai/OpenaiConfig.java @@ -1,5 +1,8 @@ package com.likelion.backendplus4.talkpick.batch.common.configuration.openai; +import org.springframework.ai.chat.client.ChatClient; +import org.springframework.ai.openai.OpenAiChatModel; +import org.springframework.ai.openai.OpenAiChatOptions; import org.springframework.ai.openai.api.OpenAiApi; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; @@ -12,8 +15,21 @@ */ @Configuration public class OpenaiConfig { - @Value("${spring.ai.openai.api-key}") - private String apiKey; + private final String apiKey; + private final String chatModelName; + private final Double temperature; + private final Integer maxToken; + + public OpenaiConfig( + @Value("${spring.ai.openai.api-key}") String apiKey, + @Value("${spring.ai.openai.summary.model}") String chatModelName, + @Value("${spring.ai.openai.summary.temperature}") Double temperature, + @Value("${spring.ai.openai.summary.maxCompletionTokens}") Integer maxToken) { + this.apiKey = apiKey; + this.chatModelName = chatModelName; + this.temperature = temperature; + this.maxToken = maxToken; + } /** * OpenAiApi 빈을 생성한다. @@ -26,4 +42,19 @@ public class OpenaiConfig { public OpenAiApi openaiApi() { return new OpenAiApi(apiKey); } + + @Bean + public ChatClient chatClient(OpenAiChatModel chatModel) { + return ChatClient.create(chatModel); + } + + @Bean + public OpenAiChatModel chatModel(OpenAiApi openAiApi) { + OpenAiChatOptions options = OpenAiChatOptions.builder() + .model(chatModelName) + .temperature(temperature) + .maxCompletionTokens(maxToken) + .build(); + return new OpenAiChatModel(openAiApi, options); + } } diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/embedding/EmbeddingPort.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/embedding/EmbeddingPort.java deleted file mode 100644 index a91aabc..0000000 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/embedding/EmbeddingPort.java +++ /dev/null @@ -1,13 +0,0 @@ -package com.likelion.backendplus4.talkpick.batch.embedding; - -public interface EmbeddingPort { - - /** - * 주어진 텍스트에 대한 임베딩 벡터를 반환한다. - * - * @param text 입력 텍스트 - * @return 텍스트 임베딩 벡터 배열 - * @since 2025-05-11 - */ - float[] getEmbedding(String text); -} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/executor/BatchJobExecutor.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/executor/BatchJobExecutor.java index c226437..2421b0c 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/executor/BatchJobExecutor.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/executor/BatchJobExecutor.java @@ -5,11 +5,7 @@ import org.springframework.batch.core.Job; import org.springframework.batch.core.JobParameters; import org.springframework.batch.core.JobParametersBuilder; -import org.springframework.batch.core.JobParametersInvalidException; import org.springframework.batch.core.launch.JobLauncher; -import org.springframework.batch.core.repository.JobExecutionAlreadyRunningException; -import org.springframework.batch.core.repository.JobInstanceAlreadyCompleteException; -import org.springframework.batch.core.repository.JobRestartException; import org.springframework.stereotype.Component; import com.likelion.backendplus4.talkpick.batch.news.article.exception.BatchJobExceptionTranslator; @@ -34,7 +30,7 @@ @RequiredArgsConstructor public class BatchJobExecutor implements org.quartz.Job { private final JobLauncher jobLauncher; - private final Job articleCollectorBatchJob; + private final Job articleCollectJob; private final BatchJobExceptionTranslator batchJobExceptionTranslator; /** @@ -64,7 +60,7 @@ private void startSpringBatchJob() { .toJobParameters(); try { - jobLauncher.run(articleCollectorBatchJob, params); + jobLauncher.run(articleCollectJob, params); } catch (Exception e) { ArticleCollectorErrorCode exceptionCode = batchJobExceptionTranslator.translate(e); throw new ArticleCollectorException(exceptionCode); diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/quartz/QuartzJobConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/quartz/CollectorQuartzJobConfig.java similarity index 85% rename from src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/quartz/QuartzJobConfig.java rename to src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/quartz/CollectorQuartzJobConfig.java index 8cf1f4b..1dd273f 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/quartz/QuartzJobConfig.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/quartz/CollectorQuartzJobConfig.java @@ -9,20 +9,19 @@ /** * RSS 피드를 정기적으로 수집하는 스케줄 작업 - * application.yml의 rss.scheduler.cron 속성으로 실행 주기 설정 + * application.yml의 spring.quartz.article-collector.cron 속성으로 실행 주기 설정 * 설정이 없을 경우 기본값으로 1분마다 실행 * * 1. 서비스를 거쳐서 피드 수집 * 2. 처리된 항목 수 로깅 * 3. 예외 발생 시 오류 로깅하고 다음 스케쥴까지 대기 * - * @modified 2025-05-11 Spring Scheduler 에서 Quartz Scheduler 으로 전환 - * @modified 2025-05-10 cron 표현식을 application 으로 분리 + * @modified 2025-05-17 * @since 2025-05-10 최초 작성 * */ @Configuration -public class QuartzJobConfig { +public class CollectorQuartzJobConfig { private final String articleCollectorJobDetailName = "articleCollectorJobDetail"; diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/quartz/CollectorQuartzTriggerConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/quartz/CollectorQuartzTriggerConfig.java new file mode 100644 index 0000000..394c76e --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/quartz/CollectorQuartzTriggerConfig.java @@ -0,0 +1,49 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.quartz; + +import org.quartz.CronScheduleBuilder; +import org.quartz.JobDetail; +import org.quartz.Trigger; +import org.quartz.TriggerBuilder; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +@Configuration +public class CollectorQuartzTriggerConfig { + private final String cronExpression; + private final JobDetail articleCollectorJobDetail; + private final String articleCollectorJobDetailName = "articleCollectorJobDetail"; + + /** + * 생성자 주입을 통해 Cron 표현식을 설정한다. + * + * @param cronExpression RSS 배치 실행 주기를 정의하는 Cron 표현식 + * application.yml에서 spring.quartz.article-collector.cron 값을 로드 합니다. + * @author 함예정 + * @since 2025-05-10 + */ + public CollectorQuartzTriggerConfig(@Value("${spring.quartz.article-collector.cron}") String cronExpression, + JobDetail articleCollectorJobDetail) { + this.cronExpression = cronExpression; + this.articleCollectorJobDetail = articleCollectorJobDetail; + } + + /** + * RSS 수집 Quartz Trigger 빈 등록. + * - forJob: 이 Trigger 가 어떤 Quartz Job 과 연관되어 실행될지를 지정 + * - withIdentity: Scheduler 내에서 이 Trigger 를 고유하게 식별하기 위한 이름 지정 + * - withSchedule: Cron 표현식을 사용하여 실행 주기 설정 + * + * @return RSS 배치 작업용 Trigger 객체 + * @author 함예정 + * @since 2025-05-10 + */ + @Bean + public Trigger rssBatchTrigger() { + return TriggerBuilder.newTrigger() + .forJob(articleCollectorJobDetail) + .withIdentity(articleCollectorJobDetailName + "trigger") + .withSchedule(CronScheduleBuilder.cronSchedule(cronExpression)) + .build(); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/quartz/QuartzTriggerConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/quartz/QuartzTriggerConfig.java deleted file mode 100644 index 17577b7..0000000 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/quartz/QuartzTriggerConfig.java +++ /dev/null @@ -1,49 +0,0 @@ -package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.quartz; - -import org.quartz.CronScheduleBuilder; -import org.quartz.JobDetail; -import org.quartz.Trigger; -import org.quartz.TriggerBuilder; -import org.springframework.beans.factory.annotation.Value; -import org.springframework.context.annotation.Bean; -import org.springframework.context.annotation.Configuration; - -@Configuration -public class QuartzTriggerConfig { - private final String cronExpression; - private final JobDetail articleCollectorJobDetail; - private final String articleCollectorJobDetailName = "articleCollectorJobDetail"; - - /** - * 생성자 주입을 통해 Cron 표현식을 설정한다. - * - * @param cronExpression RSS 배치 실행 주기를 정의하는 Cron 표현식 - * application.yml에서 article-collector.quartz.cron 값을 로드 합니다. - * @author 함예정 - * @since 2025-05-10 - */ - public QuartzTriggerConfig(@Value("${article-collector.quartz.cron}") String cronExpression, - JobDetail articleCollectorJobDetail) { - this.cronExpression = cronExpression; - this.articleCollectorJobDetail = articleCollectorJobDetail; - } - - /** - * RSS 수집 Quartz Trigger 빈 등록. - * - forJob: 이 Trigger 가 어떤 Quartz Job 과 연관되어 실행될지를 지정 - * - withIdentity: Scheduler 내에서 이 Trigger 를 고유하게 식별하기 위한 이름 지정 - * - withSchedule: Cron 표현식을 사용하여 실행 주기 설정 - * - * @return RSS 배치 작업용 Trigger 객체 - * @author 함예정 - * @since 2025-05-10 - */ - @Bean - public Trigger rssBatchTrigger() { - return TriggerBuilder.newTrigger() - .forJob(articleCollectorJobDetail) - .withIdentity(articleCollectorJobDetailName) - .withSchedule(CronScheduleBuilder.cronSchedule(cronExpression)) - .build(); - } -} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/config/ArticleEmbeddingJobConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/config/ArticleEmbeddingJobConfig.java new file mode 100644 index 0000000..ea25f74 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/config/ArticleEmbeddingJobConfig.java @@ -0,0 +1,52 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.config; + +import org.springframework.batch.core.Job; +import org.springframework.batch.core.Step; +import org.springframework.batch.core.job.builder.JobBuilder; +import org.springframework.batch.core.repository.JobRepository; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +/** + * Spring Batch Job 설정 클래스.

+ * 요약된 뉴스를 OpenAi를 활용해 + * Embedding Vector를 계산하는 + * Batch Job 을 정의하며, 파티셔닝된 Step 을 시작 단계로 구성한다.

+ * + * 이 잡은 {@code articleCollectorBatchJob}이라는 이름으로 정의되며, + * {@link Step} 객체는 외부에서 주입받아 사용한다.

+ * + * 해당 Job 은 Quartz 또는 Spring Scheduler 를 통해 주기적으로 실행될 수 있다.

+ * + * @since 2025-05-17 + */ +@Configuration +public class ArticleEmbeddingJobConfig { + private final String jobName = "articleEmbeddingJob"; + private final JobRepository jobRepository; + private final Step articleEmbeddingStep; + + public ArticleEmbeddingJobConfig( + JobRepository jobRepository, + Step articleEmbeddingStep) { + + this.jobRepository = jobRepository; + this.articleEmbeddingStep = articleEmbeddingStep; + } + + /** + * 뉴스 요약 정보를 임베딩 Vector로 계산하는 + * Spring Batch Job Bean을 생성한다. + * 파티셔닝 Step 을 실행하도록 구성한다. + * + * @return 뉴스 임베딩 Job + * @author 함예정 + * @since 2025-05-17 + */ + @Bean + public Job articleEmbeddingJob() { + return new JobBuilder(jobName, jobRepository) + .start(articleEmbeddingStep) + .build(); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/config/ArticleEmbeddingStepConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/config/ArticleEmbeddingStepConfig.java new file mode 100644 index 0000000..cdb7cf9 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/config/ArticleEmbeddingStepConfig.java @@ -0,0 +1,101 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.config; + +import org.springframework.batch.core.Step; +import org.springframework.batch.core.repository.JobRepository; +import org.springframework.batch.core.step.builder.StepBuilder; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.core.task.TaskExecutor; +import org.springframework.transaction.PlatformTransactionManager; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.exception.EmbeddingException; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.partitioner.ArticleEmbeddingPartitioner; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.processor.ArticleEmbeddingProcessor; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.reader.ArticleEmbeddingPageReader; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.writer.ArticleEmbeddingWriter; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.converter.exception.JpaConvertorException; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; + +/** + * 뉴스 기사 임베딩 작업을 위한 Spring Batch Step 설정 클래스. + * - 총 페이지 수 계산을 위한 tasklet step + * - 마스터-슬레이브 파티셔닝 기반 임베딩 처리 step 구성 + * + * @since 2025-05-17 + */ +@Configuration +public class ArticleEmbeddingStepConfig { + private final String executorName = "normalExecutor"; + private final int gridSize = 5; + private final int chunkSize = 100; + private final int retryLimit = 3; + private final int skipLimit = 100; + + private final JobRepository jobRepository; + private final PlatformTransactionManager transactionManager; + private final TaskExecutor taskExecutor; + + public ArticleEmbeddingStepConfig( + JobRepository jobRepository, + PlatformTransactionManager platformTransactionManager, + @Qualifier(executorName) TaskExecutor taskExecutor) { + this.jobRepository = jobRepository; + this.transactionManager = platformTransactionManager; + this.taskExecutor = taskExecutor; + } + + /** + * 기사 임베딩 처리를 마스터-슬레이브 구조로 병렬 처리하기 위한 마스터 Step을 생성한다. + * + * @param partitioner 파티셔닝 전략 구현체 + * @param articleEmbeddingSlaveStep 실제 데이터 처리를 수행하는 슬레이브 Step + * @return 마스터 파티셔닝 Step + * @author 함예정 + * @since 2025-05-17 + */ + @Bean + public Step articleEmbeddingStep( + ArticleEmbeddingPartitioner partitioner, + Step articleEmbeddingSlaveStep) { + + return new StepBuilder("articleEmbeddingStep", jobRepository) + .partitioner("articleEmbeddingSlavePart", partitioner) + .step(articleEmbeddingSlaveStep) + .taskExecutor(taskExecutor) + .gridSize(gridSize) + .build(); + } + + /** + * 한 파티션 내에서 요약된 뉴스 내용을 기준으로 임베딩 벡터를 생성하고 + * DB에 저장하는 슬레이브 Step을 생성한다. + * 지정된 예외에 대해 재시도 및 스킵 처리를 통해 장애 허용 처리를 수행한다. + * + * @param reader 임베딩 대상 뉴스 기사 데이터를 페이지 단위로 읽어오는 Reader + * @param processor 뉴스 요약을 임베딩 벡터로 변환하는 Processor + * @param writer 임베딩된 결과를 DB에 저장하는 Writer + * @return 슬레이브 Step + * @author 함예정 + * @since 2025-05-17 + */ + @Bean + public Step articleEmbeddingSlaveStep( + ArticleEmbeddingPageReader reader, + ArticleEmbeddingProcessor processor, + ArticleEmbeddingWriter writer) { + + return new StepBuilder("articleEmbeddingSlaveStep", jobRepository) + .chunk(chunkSize, transactionManager) + .reader(reader) + .processor(processor) + .writer(writer) + .faultTolerant() + .retry(EmbeddingException.class) + .retryLimit(retryLimit) + .skip(EmbeddingException.class) + .skip(JpaConvertorException.class) + .skipLimit(skipLimit) + .build(); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/embedding/exception/EmbeddingException.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/exception/EmbeddingException.java similarity index 84% rename from src/main/java/com/likelion/backendplus4/talkpick/batch/embedding/exception/EmbeddingException.java rename to src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/exception/EmbeddingException.java index 0160dcc..4aff840 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/embedding/exception/EmbeddingException.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/exception/EmbeddingException.java @@ -1,4 +1,4 @@ -package com.likelion.backendplus4.talkpick.batch.embedding.exception; +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.exception; import com.likelion.backendplus4.talkpick.batch.common.exception.CustomException; import com.likelion.backendplus4.talkpick.batch.common.exception.error.ErrorCode; diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/embedding/exception/error/EmbeddingErrorCode.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/exception/error/EmbeddingErrorCode.java similarity index 85% rename from src/main/java/com/likelion/backendplus4/talkpick/batch/embedding/exception/error/EmbeddingErrorCode.java rename to src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/exception/error/EmbeddingErrorCode.java index 55774fa..ed8d00e 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/embedding/exception/error/EmbeddingErrorCode.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/exception/error/EmbeddingErrorCode.java @@ -1,4 +1,4 @@ -package com.likelion.backendplus4.talkpick.batch.embedding.exception.error; +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.exception.error; import org.springframework.http.HttpStatus; @@ -28,13 +28,13 @@ * 002: 두 번째 오류 * 003: 세 번째 오류, 등등 * - * @modified 2025-05-09 * @since 2025-05-09 */ @RequiredArgsConstructor public enum EmbeddingErrorCode implements ErrorCode { MODEL_CREATION_ERROR(HttpStatus.INTERNAL_SERVER_ERROR, 440001, "임베딩 모델 생성 실패"), - API_CALL_ERROR(HttpStatus.SERVICE_UNAVAILABLE, 440002, "임베딩 API 호출 실패"); + API_CALL_ERROR(HttpStatus.SERVICE_UNAVAILABLE, 440002, "임베딩 API 호출 실패"), + ITEM_NOT_FOUND(HttpStatus.SERVICE_UNAVAILABLE, 450001, "임베딩할 아이템이 없습니다"); private final HttpStatus status; private final int code; diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/partitioner/ArticleEmbeddingPartitioner.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/partitioner/ArticleEmbeddingPartitioner.java new file mode 100644 index 0000000..88db515 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/partitioner/ArticleEmbeddingPartitioner.java @@ -0,0 +1,97 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.partitioner; + +import java.util.List; +import java.util.Map; + +import org.springframework.batch.core.partition.support.Partitioner; +import org.springframework.batch.item.ExecutionContext; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.exception.EmbeddingException; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.exception.error.EmbeddingErrorCode; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.support.partitioner.IdRangePartitionCalculator; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.support.partitioner.PartitionMapBuilder; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.support.partitioner.dto.ArticleIdRange; + +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +/** + * {@link Partitioner} 구현체로, 전체 페이지 수(totalPages)를 기준으로 + * 병렬 처리할 수 있도록 페이지 범위를 gridSize만큼 균등하게 분할한다. + * + *

Spring Batch에서 멀티스레드로 작업을 병렬 처리할 때 사용되며, + * 각 ExecutionContext에는 'startPage'와 'endPage'가 설정된다. + * + * @since 2025-05-17 + */ +@Slf4j +@Component +@RequiredArgsConstructor +public class ArticleEmbeddingPartitioner implements Partitioner { + private static final String QUERY_GET_MIN_ID = """ + SELECT MIN(a.id) FROM ArticleEntity a WHERE a.summary IS NOT NULL AND a.summaryVector IS NULL + """; + private static final String QUERY_GET_MAX_ID = """ + SELECT MAX(a.id) FROM ArticleEntity a WHERE a.summary IS NOT NULL AND a.summaryVector IS NULL + """; + + private final IdRangePartitionCalculator calculator; + + @PersistenceContext + private final EntityManager entityManager; + + /** + * ID 범위를 기준으로 gridSize만큼 파티션을 분할하여 반환한다. + * + * @param gridSize 생성할 파티션 수 + * @return 각 파티션의 ExecutionContext를 담은 맵 + * @author 함예정 + * @since 2025-05-18 + */ + @Override + public Map partition(int gridSize) { + log.info("Partitioning article embedding partitioner with gridSize: {}", gridSize); + Long minId = createQuery(QUERY_GET_MIN_ID); + Long maxId = createQuery(QUERY_GET_MAX_ID); + + throwIfInvalidIdRange(minId, maxId); + List ranges = calculator.calculate(minId, maxId, gridSize); + return PartitionMapBuilder.build(ranges); + } + + /** + * 주어진 JPQL 쿼리를 실행하여 Long 타입 결과를 조회한다. + * + * @param query 실행할 JPQL 쿼리 문자열 + * @return 쿼리 결과 값 + * @author 함예정 + * @since 2025-05-17 + */ + private Long createQuery(String query) { + return entityManager.createQuery( + query, + Long.class + ).getSingleResult(); + } + + /** + * 조회된 ID 범위가 유효하지 않을 경우 예외를 발생시킨다. + * + * minId 또는 maxId가 null이거나, minId가 maxId보다 큰 경우 + * {@link EmbeddingException}을 {@link EmbeddingErrorCode#ITEM_NOT_FOUND}와 함께 발생시킨다. + * + * @param minId ID 범위의 최소값 + * @param maxId ID 범위의 최대값 + * @throws EmbeddingException 유효하지 않은 ID 범위일 경우 + * @author 함예정 + * @since 2025-05-18 + */ + private void throwIfInvalidIdRange(Long minId, Long maxId) { + if (minId == null || maxId == null || minId > maxId) { + throw new EmbeddingException(EmbeddingErrorCode.ITEM_NOT_FOUND); + } + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/embedding/OpenAIEmbeddingAdapter.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/processor/ArticleEmbeddingProcessor.java similarity index 52% rename from src/main/java/com/likelion/backendplus4/talkpick/batch/embedding/OpenAIEmbeddingAdapter.java rename to src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/processor/ArticleEmbeddingProcessor.java index 7ce7aff..5255875 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/embedding/OpenAIEmbeddingAdapter.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/processor/ArticleEmbeddingProcessor.java @@ -1,4 +1,4 @@ -package com.likelion.backendplus4.talkpick.batch.embedding; +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.processor; import java.util.List; @@ -8,43 +8,63 @@ import org.springframework.ai.openai.OpenAiEmbeddingOptions; import org.springframework.ai.openai.api.OpenAiApi; import org.springframework.ai.retry.RetryUtils; +import org.springframework.batch.item.ItemProcessor; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; -import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.EntryExitLog; -import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.LogMethodValues; -import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.TimeTracker; -import com.likelion.backendplus4.talkpick.batch.embedding.exception.EmbeddingException; -import com.likelion.backendplus4.talkpick.batch.embedding.exception.error.EmbeddingErrorCode; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.exception.EmbeddingException; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.exception.error.EmbeddingErrorCode; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; + +import lombok.extern.slf4j.Slf4j; /** - * OpenAI API를 사용하여 텍스트 임베딩을 생성하는 어댑터 구현체 + * 뉴스 기사 요약(summary)을 기반으로 임베딩 벡터를 생성하고 + * 해당 벡터를 ArticleEntity에 설정하는 ItemProcessor 구현체. + * Spring Batch의 처리 단계에서 사용된다. * - * @since 2025-05-11 + * @since 2025-05-17 */ @Component -public class OpenAIEmbeddingAdapter implements EmbeddingPort { +@Slf4j +public class ArticleEmbeddingProcessor implements ItemProcessor { private final OpenAiApi openAiApi; private final String embeddingModelName; - public OpenAIEmbeddingAdapter(OpenAiApi openAiApi, + public ArticleEmbeddingProcessor(OpenAiApi openAiApi, @Value("${spring.ai.openai.embedding-model}") String embeddingModelName) { this.openAiApi = openAiApi; this.embeddingModelName = embeddingModelName; } /** - * 주어진 텍스트에 대한 임베딩 벡터를 반환한다. + * ArticleEntity의 summary 필드를 기반으로 임베딩 벡터를 생성하고, + * 해당 벡터를 summaryVector 필드에 설정하여 반환한다. * - * @param text 입력 텍스트 - * @return 텍스트 임베딩 벡터 배열 - * @since 2025-05-11 + * @param item 임베딩할 summary를 가진 ArticleEntity + * @return summaryVector가 설정된 ArticleEntity + * @author 함예정 + * @since 2025-05-17 */ - @EntryExitLog - @LogMethodValues - @TimeTracker @Override - public float[] getEmbedding(String text) { + public ArticleEntity process(ArticleEntity item) { + log.info("뉴스 임베딩: id = {}, guid = {}, Thread = {}", + item.getId(), item.getGuid(), Thread.currentThread().getName()); + String newsContent = item.getSummary(); + float[] vector = getEmbedding(newsContent); + return item.changeSummaryVector(vector); + } + + /** + * 주어진 텍스트에 대해 임베딩 벡터(float 배열)를 생성한다. + * 내부적으로 OpenAI 임베딩 모델을 생성하고 실행한다. + * + * @param text 임베딩할 입력 텍스트 + * @return 텍스트에 대한 임베딩 벡터 + * @author 정안식 + * @since 2025-05-11 + */ + private float[] getEmbedding(String text) { OpenAiEmbeddingModel model = createModel(); return executeEmbedding(model, text); } diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/reader/ArticleEmbeddingPageReader.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/reader/ArticleEmbeddingPageReader.java new file mode 100644 index 0000000..ea00f95 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/reader/ArticleEmbeddingPageReader.java @@ -0,0 +1,42 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.reader; + +import java.util.HashMap; +import java.util.Map; + +import org.springframework.batch.core.configuration.annotation.StepScope; +import org.springframework.batch.item.database.JpaPagingItemReader; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; + +import jakarta.persistence.EntityManagerFactory; +import lombok.extern.slf4j.Slf4j; + +@Component +@StepScope +@Slf4j +public class ArticleEmbeddingPageReader extends JpaPagingItemReader { + private static final String JPQL = """ + SELECT a + FROM ArticleEntity a + WHERE a.summaryVector IS NULL + AND a.summary IS NOT NULL + AND a.id BETWEEN :minId AND :maxId + """; + public ArticleEmbeddingPageReader( + EntityManagerFactory entityManagerFactory, + @Value("#{stepExecutionContext[minId]}") Long minId, + @Value("#{stepExecutionContext[maxId]}") Long maxId) { + + this.setName("articleEmbeddingReader-" + minId + "-" + maxId); + this.setEntityManagerFactory(entityManagerFactory); + this.setQueryString(JPQL); + Map params = new HashMap<>(); + params.put("minId", minId); + params.put("maxId", maxId); + this.setParameterValues(params); + this.setSaveState(false); + log.info("Initialized reader for ID range {} ~ {}", minId, maxId); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/writer/ArticleEmbeddingWriter.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/writer/ArticleEmbeddingWriter.java new file mode 100644 index 0000000..484deca --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/writer/ArticleEmbeddingWriter.java @@ -0,0 +1,21 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.writer; + +import org.springframework.batch.item.Chunk; +import org.springframework.batch.item.ItemWriter; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.repository.NewsInfoJpaRepository; + +import lombok.RequiredArgsConstructor; + +@Component +@RequiredArgsConstructor +public class ArticleEmbeddingWriter implements ItemWriter { + private final NewsInfoJpaRepository newsInfoJpaRepository; + + @Override + public void write(Chunk chunk) { + newsInfoJpaRepository.saveAll(chunk); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/quartz/config/ArticleEmbeddingJobExecutor.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/quartz/config/ArticleEmbeddingJobExecutor.java new file mode 100644 index 0000000..763ebf8 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/quartz/config/ArticleEmbeddingJobExecutor.java @@ -0,0 +1,76 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.quartz.config; + +import org.quartz.DisallowConcurrentExecution; +import org.quartz.JobExecutionContext; +import org.springframework.batch.core.Job; +import org.springframework.batch.core.JobParameters; +import org.springframework.batch.core.JobParametersBuilder; +import org.springframework.batch.core.launch.JobLauncher; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; +import com.likelion.backendplus4.talkpick.batch.news.article.exception.BatchJobExceptionTranslator; +import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +/** + * Quartz에 의해 트리거되는 Spring Batch Job 실행 클래스. + * JobLauncher를 통해 {@code rssJob}을 수동 실행하며, 매 실행 시 고유한 JobParameters를 생성하여 중복 실행을 방지한다. + * + * - @DisallowConcurrentExecution: 이전 실행이 끝나기 전에는 새로운 실행이 중첩되지 않도록 제한 + * - JobParameters에 timestamp를 포함시켜 매번 다른 인스턴스로 실행되도록 설정 + * + * 이 클래스는 단순한 실행자 역할만 수행한다. + * + * @since 2025-05-17 + */ +@Component +@Slf4j +@DisallowConcurrentExecution +@RequiredArgsConstructor +public class ArticleEmbeddingJobExecutor implements org.quartz.Job { + private final JobLauncher jobLauncher; + private final Job summaryJob; + private final Job articleEmbeddingJob; + private final BatchJobExceptionTranslator batchJobExceptionTranslator; + + /** + * Quartz 트리거에 의해 호출되는 메서드. + * 내부적으로 Spring Batch Job을 실행하는 로직을 위임한다. + * + * @param jobExecutionContext Quartz 실행 컨텍스트 + * @author 함예정 + * @since 2025-05-17 + */ + @Override + public void execute(JobExecutionContext jobExecutionContext) { + startSpringBatchJob(); + } + + /** + * Spring Batch Job을 JobLauncher를 통해 실행한다. + * 각 실행마다 timestamp 파라미터를 부여하여 중복 실행 방지. + * 예외 발생 시 {@link ArticleCollectorException}으로 변환하여 처리한다. + * + * @author 함예정 + * @since 2025-05-17 + */ + private void startSpringBatchJob() { + JobParameters params = new JobParametersBuilder() + .addLong("timestamp", System.currentTimeMillis()) + .toJobParameters(); + + try { + log.info("Quartz Job 실행 - 요약"); + jobLauncher.run(summaryJob, params); + + log.info("Quartz Job 실행 - Embedding"); + jobLauncher.run(articleEmbeddingJob, params); + } catch (Exception e) { + ArticleCollectorErrorCode exceptionCode = batchJobExceptionTranslator.translate(e); + throw new ArticleCollectorException(exceptionCode); + } + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/quartz/config/ArticleEmbeddingQuartzJobConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/quartz/config/ArticleEmbeddingQuartzJobConfig.java new file mode 100644 index 0000000..f380509 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/quartz/config/ArticleEmbeddingQuartzJobConfig.java @@ -0,0 +1,40 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.quartz.config; + +import org.quartz.JobBuilder; +import org.quartz.JobDetail; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +import lombok.Getter; +import lombok.extern.slf4j.Slf4j; + +/** + * 수집된 뉴스를 요약하고 임베딩을 계산하는 스케줄 작업 + * application.yml의 spring.quartz.article-embedding.cron 속성으로 실행 주기 설정 + * @since 2025-05-17 + */ +@Slf4j +@Configuration +public class ArticleEmbeddingQuartzJobConfig { + @Getter + private static final String ARTICLE_EMBEDDING_JOB_DETAIL_NAME = "articleEmbeddingJobDetail"; + + /** + * RSS 수집 Quartz JobDetail 빈 등록. + * Job 클래스는 {@link ArticleEmbeddingJobExecutor}이며 다음과 같은 설정을 포함한다: + * - withIdentity("rssBatchJob"): Scheduler 내에서 이 Job을 고유하게 식별하기 위한 이름 지정 + * - storeDurably(): Trigger가 없더라도 Scheduler에 등록된 상태로 유지되도록 설정 + * + * @return RSS 배치 작업용 JobDetail 객체 + * @author 함예정 + * @since 2025-05-17 + */ + @Bean(ARTICLE_EMBEDDING_JOB_DETAIL_NAME) + public JobDetail articleEmbeddingJobDetail() { + return JobBuilder.newJob(ArticleEmbeddingJobExecutor.class) + .withIdentity(ARTICLE_EMBEDDING_JOB_DETAIL_NAME) + .storeDurably() + .build(); + } + +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/quartz/config/ArticleEmbeddingQuartzTriggerConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/quartz/config/ArticleEmbeddingQuartzTriggerConfig.java new file mode 100644 index 0000000..1a1e0f9 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/quartz/config/ArticleEmbeddingQuartzTriggerConfig.java @@ -0,0 +1,57 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.quartz.config; + +import org.quartz.CronScheduleBuilder; +import org.quartz.JobDetail; +import org.quartz.Trigger; +import org.quartz.TriggerBuilder; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +import lombok.extern.slf4j.Slf4j; + +@Configuration +@Slf4j +public class ArticleEmbeddingQuartzTriggerConfig { + private final String cronExpression; + private final JobDetail articleEmbeddingJobDetail; + private final String articleEmbeddingJobDetailName = + ArticleEmbeddingQuartzJobConfig.getARTICLE_EMBEDDING_JOB_DETAIL_NAME(); + + /** + * 생성자 주입을 통해 Cron 표현식을 설정한다. + * + * @param cronExpression RSS 배치 실행 주기를 정의하는 Cron 표현식 + * application.yml에서 spring.quartz.article-embedding.cron 값을 로드 합니다. + * @author 함예정 + * @since 2025-05-17 + */ + public ArticleEmbeddingQuartzTriggerConfig( + @Value("${spring.quartz.article-embedding.cron}") String cronExpression, + @Qualifier("articleEmbeddingJobDetail") + JobDetail articleEmbeddingJobDetail) { + this.cronExpression = cronExpression; + this.articleEmbeddingJobDetail = articleEmbeddingJobDetail; + } + + /** + * 내용 임베딩 작업을 위한 Quartz Trigger 빈 등록. + * - forJob: 이 Trigger 가 어떤 Quartz Job 과 연관되어 실행될지를 지정 + * - withIdentity: Scheduler 내에서 이 Trigger 를 고유하게 식별하기 위한 이름 지정 + * - withSchedule: Cron 표현식을 사용하여 실행 주기 설정 + * + * @return RSS 배치 작업용 Trigger 객체 + * @author 함예정 + * @since 2025-05-17 + */ + @Bean + public Trigger articleEmbeddingQuartzTrigger() { + log.info("Quartz Trigger: " + articleEmbeddingJobDetailName); + return TriggerBuilder.newTrigger() + .forJob(articleEmbeddingJobDetail) + .withIdentity(articleEmbeddingJobDetailName + "trigger") + .withSchedule(CronScheduleBuilder.cronSchedule(cronExpression)) + .build(); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/converter/FloatArrayToJsonConverter.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/converter/FloatArrayToJsonConverter.java new file mode 100644 index 0000000..a3d7b58 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/converter/FloatArrayToJsonConverter.java @@ -0,0 +1,100 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.converter; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.converter.exception.JpaConvertorException; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.converter.exception.error.JpaConvertorErrorCode; + +import jakarta.persistence.AttributeConverter; +import jakarta.persistence.Converter; + +/** + * {@link AttributeConverter} 구현체로, float 배열(float[])을 JSON 문자열로 직렬화하거나 + * JSON 문자열을 float 배열로 역직렬화하여 MySQL JSON 타입 컬럼과 매핑한다. + * + *

직렬화/역직렬화 과정에서 오류가 발생하면 {@link JpaConvertorException}을 발생시키며, + * 오류 유형은 {@link JpaConvertorErrorCode#JSON_CONVERT_ERROR}로 정의된다. + * + *

MySQL에는 배열 타입이 없으므로, 배열 데이터를 JSON 형태로 저장하고 읽어오는 데 유용하다. + * + * @since 2025-05-17 + */ +@Converter +public class FloatArrayToJsonConverter implements AttributeConverter { + + private final ObjectMapper objectMapper = new ObjectMapper(); + + /** + * float 배열을 JSON 문자열로 변환하여 DB에 저장한다. + * + * @param attribute float 배열 + * @return JSON 문자열 + * @author 함예정 + * @since 2025-05-17 + */ + @Override + public String convertToDatabaseColumn(float[] attribute) { + if (attribute == null || attribute.length == 0) { + return null; + } + return toStringFromFloatArray(attribute); + } + + /** + * DB에서 조회된 JSON 문자열을 float 배열로 변환하여 엔티티에 주입한다. + * + * @param dbData DB에서 조회된 JSON 문자열 + * @return float 배열 + * @author 함예정 + * @since 2025-05-17 + */ + @Override + public float[] convertToEntityAttribute(String dbData) { + if (isNullDbData(dbData)) { + return new float[0]; + } + return toFloatArrayFromString(dbData); + } + + /** + * dbData가 null 또는 빈 문자열인지 확인한다. + * + * @return boolean + * @author 함예정 + * @since 2025-05-17 + */ + private boolean isNullDbData(String dbData) { + return dbData == null || dbData.isEmpty(); + } + + /** + * float 배열을 JSON 문자열로 직렬화한다. + * + * @return Json 문자열 + * @author 함예정 + * @since 2025-05-17 + */ + private String toStringFromFloatArray(float[] attribute) { + try { + return objectMapper.writeValueAsString(attribute); + } catch (JsonProcessingException e) { + throw new JpaConvertorException(JpaConvertorErrorCode.JSON_CONVERT_ERROR, e); + } + } + + /** + * JSON 문자열을 float 배열로 역직렬화한다. + * + * @return float 배열 + * @author 함예정 + * @since 2025-05-17 + */ + private float[] toFloatArrayFromString(String dbData) { + try { + return objectMapper.readValue(dbData, float[].class); + } catch (JsonProcessingException e) { + throw new JpaConvertorException(JpaConvertorErrorCode.JSON_CONVERT_ERROR, e); + } + } + +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/converter/exception/JpaConvertorException.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/converter/exception/JpaConvertorException.java new file mode 100644 index 0000000..098f1ae --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/converter/exception/JpaConvertorException.java @@ -0,0 +1,31 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.converter.exception; + +import com.likelion.backendplus4.talkpick.batch.common.exception.CustomException; +import com.likelion.backendplus4.talkpick.batch.common.exception.error.ErrorCode; + +/** + * JPA AttributeConverter에서 변환 중 발생한 예외를 나타내는 커스텀 예외 클래스. + * + *

일반적으로 JSON 직렬화/역직렬화 중 오류가 발생했을 때 사용되며, + * {@link CustomException}을 상속하고 {@link ErrorCode}를 통해 상세 오류 정보를 제공한다. + * + * @since 2025-05-17 + */ +public class JpaConvertorException extends CustomException { + private final ErrorCode errorCode; + + public JpaConvertorException(ErrorCode errorCode) { + super(errorCode); + this.errorCode = errorCode; + } + + public JpaConvertorException(ErrorCode errorCode, Throwable cause) { + super(errorCode, cause); + this.errorCode = errorCode; + } + + @Override + public ErrorCode getErrorCode() { + return errorCode; + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/converter/exception/error/JpaConvertorErrorCode.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/converter/exception/error/JpaConvertorErrorCode.java new file mode 100644 index 0000000..25b9bb8 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/converter/exception/error/JpaConvertorErrorCode.java @@ -0,0 +1,55 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.converter.exception.error; + +import org.springframework.http.HttpStatus; + +import com.likelion.backendplus4.talkpick.batch.common.exception.error.ErrorCode; + +import lombok.RequiredArgsConstructor; + +/** + * 에러 코드 인터페이스 각 에러 항목에 대한 HTTP 상태, 에러 번호, 메시지를 제공한다. + * A[BB][CCC] + * A (1자리) : 에러 심각도 (1~5) + * 1: 클라이언트 오류 + * 2: 인증 관련 오류 + * 3: 사용자 관련 오류 + * 4: 서버 오류 + * 5: 시스템 오류 + * + * BB (2자리) : 도메인 코드 + * 10: 사용자 관련 (ex: USER_NOT_FOUND) + * 20: 인증 관련 (ex: AUTHORIZATION_FAILED) + * 30: DB 관련 오류 (ex: DB_CONNECTION_FAILED) + * 40: API 관련 오류 (ex: API_TIMEOUT) + * 50: 시스템 오류 (ex: INTERNAL_SERVER_ERROR) + * + * CCC (3자리) : 세부 오류 순번 + * 001: 첫 번째 오류 + * 002: 두 번째 오류 + * 003: 세 번째 오류, 등등 + * + * @since 2025-05-17 + */ +@RequiredArgsConstructor +public enum JpaConvertorErrorCode implements ErrorCode { + JSON_CONVERT_ERROR(HttpStatus.INTERNAL_SERVER_ERROR, 440003, "JSON 컨버터 타입 변환 실패"); + + private final HttpStatus status; + private final int code; + private final String message; + + @Override + public HttpStatus httpStatus() { + return status; + } + + @Override + public int codeNumber() { + return code; + } + + @Override + public String message() { + return message; + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/entity/ArticleEntity.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/entity/ArticleEntity.java index a8dbfaa..19b3e16 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/entity/ArticleEntity.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/entity/ArticleEntity.java @@ -1,10 +1,25 @@ package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity; -import jakarta.persistence.*; -import lombok.*; - import java.time.LocalDateTime; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.converter.FloatArrayToJsonConverter; + +import jakarta.persistence.Column; +import jakarta.persistence.Convert; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.PrePersist; +import jakarta.persistence.Table; +import jakarta.persistence.UniqueConstraint; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; +import lombok.ToString; /** * RSS 피드를 수집 객체 @@ -26,52 +41,61 @@ @EqualsAndHashCode(of = "id") public class ArticleEntity { - @Id - @GeneratedValue(strategy = GenerationType.IDENTITY) - private Long id; + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Long id; + + @Setter + @Column(nullable = false) + private String title; + + @Column(nullable = false, unique = true) + private String link; - @Setter - @Column(nullable = false) - private String title; + @Setter + @Column(name = "pub_date") + private LocalDateTime pubDate; - @Column(nullable = false, unique = true) - private String link; + @Column + private String category; - @Setter - @Column(name = "pub_date") - private LocalDateTime pubDate; + @Column + private String guid; - @Column - private String category; + @Setter + @Column(columnDefinition = "TEXT") + private String description; - @Column - private String guid; + @Setter + @Column(name = "summary", columnDefinition = "TEXT") + private String summary; - @Setter - @Column(columnDefinition = "TEXT") - private String description; + @Setter + @Column(name = "image_url") + private String imageUrl; - @Setter - @Column(name = "summary", columnDefinition = "TEXT") - private String summary; + @Column(name = "created_at") + private LocalDateTime createdAt; - @Setter - @Column(name = "image_url") - private String imageUrl; + @Convert(converter = FloatArrayToJsonConverter.class) + @Column(name = "summary_vector", columnDefinition = "JSON") + private float[] summaryVector; - @Column(name = "created_at") - private LocalDateTime createdAt; + public ArticleEntity changeSummaryVector(float[] vector) { + summaryVector = vector; + return this; + } - @PrePersist - protected void onCreate() { - createdAt = LocalDateTime.now(); - } + @PrePersist + protected void onCreate() { + createdAt = LocalDateTime.now(); + } - public String getDescription(){ - return description != null ? description : ""; - } + public String getDescription() { + return description != null ? description : ""; + } - public String getSummary() { - return summary != null ? summary : ""; - } + public String getSummary() { + return summary != null ? summary : ""; + } } \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/NewsInfoJpaRepository.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/NewsInfoJpaRepository.java index 00432da..20bbe9e 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/NewsInfoJpaRepository.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/NewsInfoJpaRepository.java @@ -1,25 +1,25 @@ package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.repository; -import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; +import java.time.LocalDateTime; import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.jpa.repository.Query; import org.springframework.data.repository.query.Param; import org.springframework.stereotype.Repository; -import java.time.LocalDateTime; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; @Repository public interface NewsInfoJpaRepository extends JpaRepository { - boolean existsByLink(String link); + boolean existsByLink(String link); - /** - * 특정 언론사의 가장 최신 기사 발행일 조회 - * - * @param guidPrefix 언론사 GUID 접두어 (예: "KM", "DA", "KH") - * @return 가장 최신 발행일 - */ - @Query("SELECT MAX(a.pubDate) FROM ArticleEntity a WHERE a.guid LIKE CONCAT(:guidPrefix, '%')") - LocalDateTime findLatestPubDateByGuidPrefix(@Param("guidPrefix") String guidPrefix); + /** + * 특정 언론사의 가장 최신 기사 발행일 조회 + * + * @param guidPrefix 언론사 GUID 접두어 (예: "KM", "DA", "KH") + * @return 가장 최신 발행일 + */ + @Query("SELECT MAX(a.pubDate) FROM ArticleEntity a WHERE a.guid LIKE CONCAT(:guidPrefix, '%')") + LocalDateTime findLatestPubDateByGuidPrefix(@Param("guidPrefix") String guidPrefix); } \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/config/SummaryJobConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/config/SummaryJobConfig.java new file mode 100644 index 0000000..e1ffc58 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/config/SummaryJobConfig.java @@ -0,0 +1,47 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.summary.batch.config; + +import org.springframework.batch.core.Job; +import org.springframework.batch.core.Step; +import org.springframework.batch.core.job.builder.JobBuilder; +import org.springframework.batch.core.repository.JobRepository; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +/** + * Spring Batch Job 설정 클래스.

+ * 수집된 기사를 OpenAI를 활용해 뉴스 내용을 요약하는 + * Batch Job 을 정의하며, 파티셔닝된 Step 을 시작 단계로 구성한다.

+ * + * 이 잡은 {@code articleCollectorBatchJob}이라는 이름으로 정의되며, + * {@link Step} 객체는 외부에서 주입받아 사용한다.

+ * + * 해당 Job 은 Quartz 또는 Spring Scheduler 를 통해 주기적으로 실행될 수 있다.

+ * + * @since 2025-05-17 + */ +@Configuration +public class SummaryJobConfig { + private final String jobName = "summaryJob"; + private final JobRepository jobRepository; + private final Step articleSummaryStep; + + public SummaryJobConfig(JobRepository jobRepository, Step articleSummaryStep) { + this.jobRepository = jobRepository; + this.articleSummaryStep = articleSummaryStep; + } + + /** + * 뉴스 내용을 요약하는 Spring Batch Job Bean을 생성한다. + * 파티셔닝 Step 을 실행하도록 구성한다. + * + * @return 뉴스 요약 Batch Job + * @author 함예정 + * @since 2025-05-10 + */ + @Bean + public Job summaryJob() { + return new JobBuilder(jobName, jobRepository) + .start(articleSummaryStep) + .build(); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/config/SummaryStepConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/config/SummaryStepConfig.java new file mode 100644 index 0000000..981e0af --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/config/SummaryStepConfig.java @@ -0,0 +1,98 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.summary.batch.config; + +import org.springframework.batch.core.Step; +import org.springframework.batch.core.partition.support.Partitioner; +import org.springframework.batch.core.repository.JobRepository; +import org.springframework.batch.core.step.builder.StepBuilder; +import org.springframework.batch.item.ItemWriter; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.core.task.TaskExecutor; +import org.springframework.transaction.PlatformTransactionManager; + +import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.summary.batch.partitioner.ArticleSummaryPartitioner; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.summary.batch.processor.ArticleSummaryProcessor; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.summary.batch.reader.ArticleSummaryPageReader; + +/** + * 배치 작업에서 기사 요약 처리를 위한 파티셔닝 및 슬레이브 Step을 설정하는 구성 클래스. + * + * @since 2025-05-17 + */ +@Configuration +public class SummaryStepConfig { + private static final String partitionedStepName = "articleSummaryStep"; + private final String executorName = "normalExecutor"; + private final String summaryStepName = "articleSummarySlaveStep"; + private final int gridSize = 5; + private final int chunkSize = 100; + private final int retryLimit = 3; + private final int skipLimit = 100; + private final JobRepository jobRepository; + private final Partitioner partitioner; + private final PlatformTransactionManager transactionManager; + private final TaskExecutor taskExecutor; + private final ArticleSummaryProcessor summaryProcessor; + private final ItemWriter writer; + + public SummaryStepConfig(JobRepository jobRepository, + ArticleSummaryPartitioner partitioner, + PlatformTransactionManager platformTransactionManager, + @Qualifier(executorName) + TaskExecutor taskExecutor, + ArticleSummaryProcessor summaryProcessor, + ItemWriter articleSummaryWriter) { + this.jobRepository = jobRepository; + this.partitioner = partitioner; + this.transactionManager = platformTransactionManager; + this.taskExecutor = taskExecutor; + this.summaryProcessor = summaryProcessor; + this.writer = articleSummaryWriter; + } + + /** + * 파티셔닝된 마스터 Step을 정의한다. + * 각 파티션은 {@code articleSummarySlaveStep}을 실행하며, 병렬 처리를 위해 TaskExecutor가 사용된다. + * + * @param articleSummarySlaveStep 파티션마다 실행될 슬레이브 Step + * @return 마스터 Step Bean + * @author 함예정 + * @since 2025-05-17 + */ + @Bean + public Step articleSummaryStep(Step articleSummarySlaveStep) { + return new StepBuilder(partitionedStepName, jobRepository) + .partitioner(partitionedStepName, partitioner) + .step(articleSummarySlaveStep) + .taskExecutor(taskExecutor) + .gridSize(gridSize) + .build(); + } + + /** + * 기사 데이터를 요약 처리하는 슬레이브 Step을 정의한다. + * 청크 기반으로 데이터를 읽고, 처리하고, 쓰며, 오류에 대해 재시도 및 건너뛰기를 허용한다. + * + * @param reader 기사 데이터를 읽는 Reader + * @return 슬레이브 Step Bean + * @author 함예정 + * @since 2025-05-17 + */ + @Bean + public Step articleSummarySlaveStep(ArticleSummaryPageReader reader) { + return new StepBuilder(summaryStepName, jobRepository) + .chunk(chunkSize, transactionManager) + .reader(reader) + .processor(summaryProcessor) + .writer(writer) + .faultTolerant() + .retry(ArticleCollectorException.class) + .retryLimit(retryLimit) + .skip(ArticleCollectorException.class) + .skipLimit(skipLimit) + .build(); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/exception/ArticleSummaryException.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/exception/ArticleSummaryException.java new file mode 100644 index 0000000..bfb7389 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/exception/ArticleSummaryException.java @@ -0,0 +1,23 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.summary.batch.exception; + +import com.likelion.backendplus4.talkpick.batch.common.exception.CustomException; +import com.likelion.backendplus4.talkpick.batch.common.exception.error.ErrorCode; + +public class ArticleSummaryException extends CustomException { + private final ErrorCode errorCode; + + public ArticleSummaryException(ErrorCode errorCode) { + super(errorCode); + this.errorCode = errorCode; + } + + public ArticleSummaryException(ErrorCode errorCode, Throwable cause) { + super(errorCode, cause); + this.errorCode = errorCode; + } + + @Override + public ErrorCode getErrorCode() { + return errorCode; + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/exception/error/ArticleSummaryErrorCode.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/exception/error/ArticleSummaryErrorCode.java new file mode 100644 index 0000000..23456a4 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/exception/error/ArticleSummaryErrorCode.java @@ -0,0 +1,55 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.summary.batch.exception.error; + +import org.springframework.http.HttpStatus; + +import com.likelion.backendplus4.talkpick.batch.common.exception.error.ErrorCode; + +import lombok.RequiredArgsConstructor; + +/** + * 에러 코드 인터페이스 각 에러 항목에 대한 HTTP 상태, 에러 번호, 메시지를 제공한다. + * A[BB][CCC] + * A (1자리) : 에러 심각도 (1~5) + * 1: 클라이언트 오류 + * 2: 인증 관련 오류 + * 3: 사용자 관련 오류 + * 4: 서버 오류 + * 5: 시스템 오류 + * + * BB (2자리) : 도메인 코드 + * 10: 사용자 관련 (ex: USER_NOT_FOUND) + * 20: 인증 관련 (ex: AUTHORIZATION_FAILED) + * 30: DB 관련 오류 (ex: DB_CONNECTION_FAILED) + * 40: API 관련 오류 (ex: API_TIMEOUT) + * 50: 시스템 오류 (ex: INTERNAL_SERVER_ERROR) + * + * CCC (3자리) : 세부 오류 순번 + * 001: 첫 번째 오류 + * 002: 두 번째 오류 + * 003: 세 번째 오류, 등등 + * + * @since 2025-05-18 + */ +@RequiredArgsConstructor +public enum ArticleSummaryErrorCode implements ErrorCode { + ITEM_NOT_FOUND(HttpStatus.SERVICE_UNAVAILABLE, 450001, "요약할 뉴스가 없습니다"); + + private final HttpStatus status; + private final int code; + private final String message; + + @Override + public HttpStatus httpStatus() { + return status; + } + + @Override + public int codeNumber() { + return code; + } + + @Override + public String message() { + return message; + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/partitioner/ArticleSummaryPartitioner.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/partitioner/ArticleSummaryPartitioner.java new file mode 100644 index 0000000..bc13fd6 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/partitioner/ArticleSummaryPartitioner.java @@ -0,0 +1,111 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.summary.batch.partitioner; + +import java.util.List; +import java.util.Map; + +import org.springframework.batch.core.partition.support.Partitioner; +import org.springframework.batch.item.ExecutionContext; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.exception.EmbeddingException; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.exception.error.EmbeddingErrorCode; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.summary.batch.exception.ArticleSummaryException; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.summary.batch.exception.error.ArticleSummaryErrorCode; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.support.partitioner.IdRangePartitionCalculator; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.support.partitioner.PartitionMapBuilder; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.support.partitioner.dto.ArticleIdRange; + +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +/** + * {@link Partitioner} 구현체로, 전체 페이지 수(totalPages)를 기준으로 + * 병렬 처리할 수 있도록 페이지 범위를 gridSize만큼 균등하게 분할한다. + * + *

Spring Batch에서 멀티스레드로 작업을 병렬 처리할 때 사용되며, + * 각 ExecutionContext에는 'startPage'와 'endPage'가 설정된다. + * + * @since 2025-05-17 + */ +@Component +@Slf4j +@RequiredArgsConstructor +public class ArticleSummaryPartitioner implements Partitioner { + private static final String QUERY_GET_MIN_ID = """ + SELECT MIN(a.id) FROM ArticleEntity a WHERE a.summary IS NULL + """; + private static final String QUERY_GET_MAX_ID = """ + SELECT MAX(a.id) FROM ArticleEntity a WHERE a.summary IS NULL + """; + + private final IdRangePartitionCalculator calculator; + @PersistenceContext + private EntityManager entityManager; + + /** + * ID 범위를 기준으로 데이터를 분할한다. + * Spring Batch에서 마스터 Step이 병렬로 슬레이브 Step을 실행할 수 있도록 파티션을 생성한다. + * + * @author 함예정 + * @since 2025-05-17 + */ + @Override + public Map partition(int gridSize) { + Long minId = createQuery(QUERY_GET_MIN_ID); + Long maxId = createQuery(QUERY_GET_MAX_ID); + + throwIfInvalidIdRange(minId, maxId); + List ranges = calculator.calculate(minId, maxId, gridSize); + return PartitionMapBuilder.build(ranges); + } + + /** + * 주어진 ID 범위가 유효하지 않을 경우 예외를 발생시킨다. + * + * minId 또는 maxId가 null이거나, minId가 maxId보다 큰 경우 + * {@link ArticleSummaryException}을 {@link ArticleSummaryErrorCode#ITEM_NOT_FOUND}와 함께 발생시킨다. + * + * @param minId ID 범위의 최소값 + * @param maxId ID 범위의 최대값 + * @throws ArticleSummaryException 유효하지 않은 ID 범위일 경우 + * + * @author 함예정 + * @since 2025-05-18 + */ + private void throwIfInvalidIdRange(Long minId, Long maxId) { + if (minId == null || maxId == null || minId > maxId) { + throw new ArticleSummaryException(ArticleSummaryErrorCode.ITEM_NOT_FOUND); + } + } + + /** + * 주어진 JPQL 쿼리를 실행하여 단일 Long 값을 반환한다. + * + * @param query 실행할 JPQL 쿼리 문자열 + * @return 조회된 Long 값 + * @author 함예정 + * @since 2025-05-17 + */ + private Long createQuery(String query) { + return entityManager.createQuery( + query, + Long.class + ).getSingleResult(); + } + + /** + * ID 범위가 유효한지 확인한다. + * + * @param minId 조회된 최소 ID + * @param maxId 조회된 최대 ID + * @return 범위가 유효하지 않으면 true 반환 + * @author 함예정 + * @since 2025-05-17 + */ + private boolean isInvalidIdRange(Long minId, Long maxId) { + return minId == null || maxId == null || minId > maxId; + } + +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/processor/ArticleSummaryProcessor.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/processor/ArticleSummaryProcessor.java new file mode 100644 index 0000000..0ddd1e8 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/processor/ArticleSummaryProcessor.java @@ -0,0 +1,53 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.summary.batch.processor; + +import org.springframework.ai.chat.client.ChatClient; +import org.springframework.batch.item.ItemProcessor; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +/** + * 뉴스 기사를 AI 모델을 통해 요약 처리하는 ItemProcessor 구현체. + * 입력으로 받은 ArticleEntity의 본문을 요약하여 summary 필드에 설정한 후 반환한다. + * + * @since 2025-05-17 + */ +@Component +@RequiredArgsConstructor +@Slf4j +public class ArticleSummaryProcessor implements ItemProcessor { + private final String prompt = "Summarize the following news in 3–5 concise sentences, objectively, in Korean.\n\n news: \n"; + private final ChatClient chatClient; + + /** + * 기사 내용을 AI를 통해 요약하고, 요약 결과를 ArticleEntity에 설정하여 반환한다. + * + * @param item 요약할 뉴스 기사 엔티티 + * @return 요약이 포함된 뉴스 기사 엔티티 + * @author 함예정 + * @since 2025-05-17 + */ + @Override + public ArticleEntity process(ArticleEntity item) { + log.info("뉴스 요약: id = {}, guid = {}", item.getId(), item.getGuid()); + String newsContent = item.getDescription(); + String summary = getSummary(newsContent); + item.setSummary(summary); + return item; + } + + /** + * 주어진 뉴스 기사 본문을 AI 모델을 통해 요약한다. + * + * @param text 뉴스 기사 본문 + * @return 요약된 텍스트 + * @author 함예정 + * @since 2025-05-17 + */ + private String getSummary(String text) { + return chatClient.prompt().user(prompt + text).call().content(); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/reader/ArticleSummaryPageReader.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/reader/ArticleSummaryPageReader.java new file mode 100644 index 0000000..57c9cb3 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/reader/ArticleSummaryPageReader.java @@ -0,0 +1,57 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.summary.batch.reader; + +import java.util.HashMap; +import java.util.Map; + +import org.springframework.batch.core.configuration.annotation.StepScope; +import org.springframework.batch.item.database.JpaPagingItemReader; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; + +import jakarta.persistence.EntityManagerFactory; +import lombok.extern.slf4j.Slf4j; + +/** + * 요약되지 않은 뉴스 기사 데이터를 ID 범위 기반으로 페이징 조회하는 JPA ItemReader. + * 파티셔닝된 슬레이브 Step에서 각 파티션이 담당할 ID 구간의 데이터를 읽기 위해 사용된다. + * + * @since 2025-05-17 + */ +@Component +@Slf4j +@StepScope +public class ArticleSummaryPageReader extends JpaPagingItemReader { + private static final String JPQL = """ + SELECT a + FROM ArticleEntity a + WHERE a.summary IS NULL + AND a.id BETWEEN :minId AND :maxId + """; + + /** + * 지정된 ID 범위에 해당하는 기사 데이터를 페이징 방식으로 읽어오는 Reader를 초기화한다. + * + * @param entityManagerFactory JPA EntityManagerFactory + * @param minId 파티션에서 처리할 최소 ID (StepExecutionContext에서 주입됨) + * @param maxId 파티션에서 처리할 최대 ID (StepExecutionContext에서 주입됨) + * @author 함예정 + * @since 2025-05-17 + */ + public ArticleSummaryPageReader( + EntityManagerFactory entityManagerFactory, + @Value("#{stepExecutionContext[minId]}") Long minId, + @Value("#{stepExecutionContext[maxId]}") Long maxId) { + + this.setName("articleSummaryReader-" + minId + "-" + maxId); + this.setEntityManagerFactory(entityManagerFactory); + this.setQueryString(JPQL); + Map params = new HashMap<>(); + params.put("minId", minId); + params.put("maxId", maxId); + this.setParameterValues(params); + this.setPageSize(100); + this.setSaveState(false); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/writer/ArticleSummaryWriter.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/writer/ArticleSummaryWriter.java new file mode 100644 index 0000000..61851b3 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/writer/ArticleSummaryWriter.java @@ -0,0 +1,34 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.summary.batch.writer; + +import org.springframework.batch.item.Chunk; +import org.springframework.batch.item.ItemWriter; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.repository.NewsInfoJpaRepository; + +import lombok.RequiredArgsConstructor; + +/** + * 처리된 뉴스 기사 요약 데이터를 DB에 저장하는 ItemWriter 구현체. + * + * @since 2025-05-17 + */ +@Component +@RequiredArgsConstructor +public class ArticleSummaryWriter implements ItemWriter { + private final NewsInfoJpaRepository newsInfoJpaRepository; + + /** + * 청크 단위로 받은 기사 요약 데이터를 데이터베이스에 일괄 저장한다. + * + * @param chunk 요약이 완료된 기사 데이터 목록 + * @throws Exception 저장 중 발생할 수 있는 예외 + * @author 함예정 + * @since 2025-05-17 + */ + @Override + public void write(Chunk chunk) throws Exception { + newsInfoJpaRepository.saveAll(chunk); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/support/partitioner/IdRangePartitionCalculator.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/support/partitioner/IdRangePartitionCalculator.java new file mode 100644 index 0000000..4063697 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/support/partitioner/IdRangePartitionCalculator.java @@ -0,0 +1,37 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.support.partitioner; + +import java.util.ArrayList; +import java.util.List; + +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.support.partitioner.dto.ArticleIdRange; + +@Component +public class IdRangePartitionCalculator { + + /** + * @param minId 최소 ID + * @param maxId 최대 ID + * @param gridSize 분할 개수 + * @return 각 파티션의 IdRange 리스트 + */ + public List calculate(long minId, long maxId, int gridSize) { + long total = maxId - minId + 1; + long baseSize = total / gridSize; + long remainder = total % gridSize; + + List ranges = new ArrayList<>(gridSize); + long start = minId; + + for (int i = 0; i < gridSize; i++) { + long size = baseSize + (i < remainder ? 1 : 0); + long end = (i == gridSize - 1) ? maxId : (start + size - 1); + + ranges.add(new ArticleIdRange(start, end)); + start = end + 1; + } + + return ranges; + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/support/partitioner/PartitionMapBuilder.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/support/partitioner/PartitionMapBuilder.java new file mode 100644 index 0000000..3639cbb --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/support/partitioner/PartitionMapBuilder.java @@ -0,0 +1,23 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.support.partitioner; + +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import org.springframework.batch.item.ExecutionContext; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.support.partitioner.dto.ArticleIdRange; + +public class PartitionMapBuilder { + public static Map build(List ranges) { + Map partitions = new LinkedHashMap<>(); + for (int i = 0; i < ranges.size(); i++) { + ArticleIdRange r = ranges.get(i); + ExecutionContext ctx = new ExecutionContext(); + ctx.putLong("minId", r.start()); + ctx.putLong("maxId", r.end()); + partitions.put("partition" + i, ctx); + } + return partitions; + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/support/partitioner/dto/ArticleIdRange.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/support/partitioner/dto/ArticleIdRange.java new file mode 100644 index 0000000..ddc66b2 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/support/partitioner/dto/ArticleIdRange.java @@ -0,0 +1,8 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.support.partitioner.dto; + +/** + * ID 범위(start, end)를 표현하는 불변 타입 DTO + * + * @since 2025-05-18 + */ +public record ArticleIdRange(long start, long end) {} diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index af087ae..13e3221 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -11,6 +11,11 @@ spring: openai: api-key: ${OPENAI_API_KEY} embedding-model: text-embedding-3-small + summary: + model: gpt-4.1-nano + maxCompletionTokens: 800 + temperature: 0.5 + application: name: talkpick-batch datasource: @@ -18,6 +23,12 @@ spring: username: ${MYSQL_USERNAME} password: ${MYSQL_PASSWORD} driver-class-name: com.mysql.cj.jdbc.Driver + hikari: + maximum-pool-size: 20 + minimum-idle: 10 + idle-timeout: 30000 + max-lifetime: 1800000 + connection-timeout: 30000 jpa: database-platform: org.hibernate.dialect.MySQL8Dialect hibernate: @@ -33,11 +44,15 @@ spring: job: enabled: false jdbc: - initialize-schema: never + initialize-schema: embedded quartz: job-store-type: jdbc jdbc: - initialize-schema: never + initialize-schema: embedded + article-collector: + cron: "0 */1 * * * ?" + article-embedding: + cron: "0 */5 * * * ?" log: rolling: @@ -46,15 +61,16 @@ log: pattern: "%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - [TraceId: %X{traceId:-no-trace}] - %msg%n" max-history: 30 total-size-cap: 10MB + +logging: + level: + org.quartz: DEBUG + file: + name: talkpick-batch.log decorator: datasource: p6spy: enable-logging: false - -article-collector: - quartz: - cron: "0 */1 * * * ?" - news: index: name: news_index From 8cca6ad9d281e5958174cbc7b00bd25d6c5e5313 Mon Sep 17 00:00:00 2001 From: "Yejeong, Ham" Date: Sun, 18 May 2025 22:00:39 +0900 Subject: [PATCH 29/36] =?UTF-8?q?=F0=9F=93=A6=20Chore:=20PR=20=EB=B0=8F=20?= =?UTF-8?q?=EC=9D=B4=EC=8A=88=20D-Day=20=EA=B4=80=EB=A6=AC=20Action=20?= =?UTF-8?q?=EC=B6=94=EA=B0=80=20(#75)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 📦 Chore: PR 및 이슈 D-Day 관리 Action 추가 * 📦 Chore: 수동 실행 제거 --- .github/workflows/d-day-labeler.yml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 .github/workflows/d-day-labeler.yml diff --git a/.github/workflows/d-day-labeler.yml b/.github/workflows/d-day-labeler.yml new file mode 100644 index 0000000..f4b448f --- /dev/null +++ b/.github/workflows/d-day-labeler.yml @@ -0,0 +1,24 @@ +name: D-day-labeler + +on: + schedule: + - cron: '0 15 * * *' +permissions: + issues: write + +jobs: + issue-d-day-labeler: + runs-on: ubuntu-latest + steps: + - name: Update D-n Labels + uses: yj-circle/issue-d-day-labeler@v1.0.1 + with: + token: ${{ secrets.GITHUB_TOKEN }} + + pr-d-day-labeler: + runs-on: [ubuntu-latest] + steps: + - name: Update D-n Labels + uses: naver/d-day-labeler@latest + with: + token: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file From a9d5a8609304c45235cc943ca4ee54d8cb1bf341 Mon Sep 17 00:00:00 2001 From: "Yejeong, Ham" Date: Sun, 18 May 2025 22:01:09 +0900 Subject: [PATCH 30/36] =?UTF-8?q?=E2=9C=A8=20Feature:=20#76=20=EC=83=89?= =?UTF-8?q?=EC=9D=B8=20=EC=A0=95=EB=B3=B4=20=EC=A0=9C=EA=B3=B5=20=ED=8F=AC?= =?UTF-8?q?=ED=8A=B8=EC=97=90=20=EB=89=B4=EC=8A=A4=20=EC=9A=94=EC=95=BD=20?= =?UTF-8?q?=EC=B6=94=EA=B0=80=20(#77)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ✨ Feature: NewsInfo 도메인 내용 요약 및 벡터 필드 추가 * ✨ Feature: 필드 추가에 따른 mapper 수정 * 📦 Chore: 컴파일 오류 해결을 위한 샘플 코드 수정 --- .../talkpick/batch/index/domain/model/NewsInfo.java | 7 +++++-- .../jpa/mapper/ArticleEntityMapper.java | 4 +++- .../batch/sample/index/NewsIndexServiceTestImpl.java | 12 +++++++++--- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/domain/model/NewsInfo.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/domain/model/NewsInfo.java index 9cd3def..386c90b 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/domain/model/NewsInfo.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/domain/model/NewsInfo.java @@ -10,15 +10,18 @@ * 뉴스 정보를 표현하는 도메인 모델 * * @since 2025-05-15 + * @modified 2025-05-18 */ @RequiredArgsConstructor @Getter @Builder -public class NewsInfo{ +public class NewsInfo { private final String newsId; private final String title; private final String content; private final LocalDateTime publishedAt; private final String imageUrl; private final String category; - } + private final String summary; + private final float[] summaryVector; +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/mapper/ArticleEntityMapper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/mapper/ArticleEntityMapper.java index 6cbb9bc..a6b7193 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/mapper/ArticleEntityMapper.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/mapper/ArticleEntityMapper.java @@ -10,8 +10,10 @@ public static NewsInfo toDomainFromEntity(ArticleEntity articleEntity) { .title(articleEntity.getTitle()) .content(articleEntity.getDescription()) .publishedAt(articleEntity.getPubDate()) - .imageUrl(null) // TODO: 나중에 추가 예정 + .imageUrl(articleEntity.getImageUrl()) .category(articleEntity.getCategory()) + .summary(articleEntity.getSummary()) + .summaryVector(articleEntity.getSummaryVector()) .build(); } } diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/index/NewsIndexServiceTestImpl.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/index/NewsIndexServiceTestImpl.java index 54470fe..08ecac2 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/index/NewsIndexServiceTestImpl.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/index/NewsIndexServiceTestImpl.java @@ -29,7 +29,9 @@ public List fetchAll() { "첫 번째 테스트 뉴스의 내용입니다.", LocalDateTime.of(2025, 5, 14, 10, 0), "https://example.com/image1.jpg", - "테스트" + "테스트", + "요약1", + new float[] {1.0f, 2.5f, 3.3f} ), new NewsInfo( "news-2", @@ -37,7 +39,9 @@ public List fetchAll() { "두 번째 테스트 뉴스의 내용입니다.", LocalDateTime.of(2025, 5, 13, 11, 30), "https://example.com/image2.jpg", - "테스트" + "테스트", + "요약2", + new float[] {1.0f, 2.5f, 3.3f} ), new NewsInfo( "news-3", @@ -45,7 +49,9 @@ public List fetchAll() { "세 번째 테스트 뉴스의 내용입니다.", LocalDateTime.of(2025, 5, 12, 14, 45), "https://example.com/image3.jpg", - "테스트" + "테스트", + "요약3", + new float[] {1.0f, 2.5f, 3.3f} ) ); } From 1fbca66b2d73e57f7cde93283ca253ffa5589e20 Mon Sep 17 00:00:00 2001 From: "Yejeong, Ham" Date: Mon, 19 May 2025 00:36:16 +0900 Subject: [PATCH 31/36] =?UTF-8?q?=F0=9F=93=A6=20Chore:=20PR=20D-n=EB=A3=B0?= =?UTF-8?q?=20=EC=95=A1=EC=85=98=20=EA=B6=8C=ED=95=9C=20=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/d-day-labeler.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/d-day-labeler.yml b/.github/workflows/d-day-labeler.yml index f4b448f..5046ba1 100644 --- a/.github/workflows/d-day-labeler.yml +++ b/.github/workflows/d-day-labeler.yml @@ -5,6 +5,8 @@ on: - cron: '0 15 * * *' permissions: issues: write + pull-requests: write + contents: read jobs: issue-d-day-labeler: @@ -21,4 +23,4 @@ jobs: - name: Update D-n Labels uses: naver/d-day-labeler@latest with: - token: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file + token: ${{ secrets.GITHUB_TOKEN }} From ea05358a1236774f4097860fcd88f93120b1c083 Mon Sep 17 00:00:00 2001 From: Atriel <118334518+Atriel1999@users.noreply.github.com> Date: Mon, 19 May 2025 10:48:57 +0900 Subject: [PATCH 32/36] =?UTF-8?q?=E2=9C=A8=20Feature:=20#56=20=EB=89=B4?= =?UTF-8?q?=EC=8A=A4=EB=B3=B8=EB=AC=B8=20=EC=8A=A4=ED=81=AC=EB=9E=98?= =?UTF-8?q?=ED=95=91=EA=B8=B0=EB=8A=A5=20=EA=B0=9C=EB=B0=9C=20(#65)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ✨ Feat: 스크래핑 인터페이스 구현 및 스크래핑 기능해서 관련 Mapper 수정 * ♻️ Refactor: 엔티티 구조 변경 (isSummary -> Summary 대체, imgurl 추가), 관련 Mapper 기능 구현 * 📦 Chore: Jsoup 의존성 추가 (1.20.1) * 🐛 Fix: 변수 입력값, @Override 관련 코드 수정 * 🐛 Fix: 생성자 관련 이슈 수정 * ✨ Feat: 스크래퍼 1차구현 * 🐛 Fix: 아직 구현하지않은 매핑정보 주석처리 * 🐛 Fix: 언론사별 extractUniqueIdFromLink 메서드 예외처리 통일 * ♻️ Refactor: Mapper, Scrapper 코드 구조 수정, 구현 기능 개선 * ♻️Refactor: 스크래핑 부분 리팩토링 및 CSS, SSL인증서 등등 필요없는 데이터 filter 추가 (받는 데이터량 감소) * 📝Docs: 메서드 내 주석제거 * ✨ Feat: 본문 데이터 직렬화 처리 * ♻️Refactor: Rss 데이터 수집 구조 변경 (중복 데이터 전처리 과정 추가), 필요없는 로그 정리 * ♻️ Refactor: 코드 리팩토링, 오타 및 구조수정(책임분리) * ♻️ Refactor: 스크래핑 재시도 로직 추가 및 직렬화 과정 리팩토링 * 🐛 Fix: 경향신문 순환 참조 오류 수정 (템플릿 메소드 패턴 구현 오류) * 🐛 Fix: 임시해결을위한 캐싱 제거 및 근본적 문제 해결 (템플릿메소드 순환참조) * ♻️ Refactor: Mapper 코드 수정 및 리팩토링 * ♻️ Refactor: Mapper 코드 수정 및 리팩토링 * ♻️ Refactor: 스크래퍼 retry처리 제거 및 커스텀예외처리 추가, 코드 리팩토링 (scraper부분) * 🐛 Fix: interface Slf4j 제거 및 직렬화 로깅 삭제 * 📦 Chore: build.gradle 의존성에 hibernation의 spring-boot-starter-validation 의존성 추가, ArticleEntity 객체 검증에 사용 * ♻️ Refactor: 조건문 요다검증식으로 변환 * ♻️ Refactor: ArticleEntity 내부에 Validation 검증 어노테이션추가 * ♻️ Refactor: null처리 메소드로 관심사 분리 * 📝 Docs: 주석 오타 수정 * ♻️ Refactor: Mapper기능에 맞지않는 유틸리티 관련 메서드 관심사 분리 * 📝 Docs: 스크래핑 본문 메소드 author 추가 * ♻️ Refactor: 기존 반복전인 코드패턴, 배열과 스트림API을 사용해 리팩토링 * ♻️ Refactor: 배열과 스트림API를 이용한 반복패턴 제거 * ♻️ Refactor: Stream Api를 활용한 코드 리팩토링 * ♻️ Refactor: if문 람다식으로 통일 * ♻️ Refactor: 한 메소드에 많은기능이 몰려있어서 여러 메소드로 책임분리 * 🔀 Conflict: ArticleEntity import 충돌해결 * 🐛 Fix: Summary Column 복구 --------- Co-authored-by: Yejeong, Ham --- build.gradle | 1 + .../batch/TalkpickBatchApplication.java | 1 - .../error/ArticleCollectorErrorCode.java | 11 + .../collector/processor/RssFeedReader.java | 150 +++++----- .../support/mapper/AbstractRssMapper.java | 265 ++++++----------- .../mapper/implement/DongaRssMapper.java | 164 +++++++---- .../mapper/implement/KhanRssMapper.java | 221 +++++++------- .../mapper/implement/KmibRssMapper.java | 216 ++++++++++---- .../support/result/ScrapingResult.java | 19 ++ .../support/scraper/ContentScraper.java | 18 +- .../scraper/factory/ScraperFactory.java | 8 +- .../implement/DongaContentScraper.java | 219 ++++++++++---- .../scraper/implement/KhanContentScraper.java | 270 +++++++++++------- .../collector/support/util/HtmlParser.java | 68 +++++ .../{scraper => }/util/HtmlScraperUtils.java | 2 +- .../collector/support/util/ParagraphUtil.java | 34 +++ .../collector/writer/ArticleWriter.java | 141 ++++++++- .../jpa/entity/ArticleEntity.java | 92 +++--- 18 files changed, 1227 insertions(+), 673 deletions(-) create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/result/ScrapingResult.java create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/HtmlParser.java rename src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/{scraper => }/util/HtmlScraperUtils.java (99%) create mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/ParagraphUtil.java diff --git a/build.gradle b/build.gradle index 115adc7..170ffb5 100644 --- a/build.gradle +++ b/build.gradle @@ -32,6 +32,7 @@ dependencies { testImplementation 'org.springframework.boot:spring-boot-starter-test' testRuntimeOnly 'org.junit.platform:junit-platform-launcher' + implementation 'org.springframework.boot:spring-boot-starter-validation' // Spring-batch implementation 'org.springframework.boot:spring-boot-starter-batch' diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/TalkpickBatchApplication.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/TalkpickBatchApplication.java index d9dec07..5a2a49a 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/TalkpickBatchApplication.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/TalkpickBatchApplication.java @@ -9,5 +9,4 @@ public class TalkpickBatchApplication { public static void main(String[] args) { SpringApplication.run(TalkpickBatchApplication.class, args); } - } diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/exception/error/ArticleCollectorErrorCode.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/exception/error/ArticleCollectorErrorCode.java index 6d18db4..30f0f58 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/exception/error/ArticleCollectorErrorCode.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/exception/error/ArticleCollectorErrorCode.java @@ -36,9 +36,20 @@ public enum ArticleCollectorErrorCode implements ErrorCode { FEED_TIMEOUT_ERROR(HttpStatus.INTERNAL_SERVER_ERROR, 450003, "RSS 피드 로드 중 시간 초과가 발생했습니다."), // Mapper 관련 오류 + RSS_CONTENT_EMPTY(HttpStatus.INTERNAL_SERVER_ERROR, 450012, "RSS 피드 내용이 비어있습니다."), + RSS_PARSING_ERROR(HttpStatus.INTERNAL_SERVER_ERROR, 450013, "RSS 피드 내용 파싱 중 오류가 발생했습니다."), + RSS_IMAGE_MISSING(HttpStatus.INTERNAL_SERVER_ERROR, 450014, "RSS 피드에서 이미지를 찾을 수 없습니다."), + ARTICLE_ID_EXTRACTION_ERROR(HttpStatus.INTERNAL_SERVER_ERROR, 450015, "기사 ID 추출 중 오류가 발생했습니다."), MAPPER_NOT_FOUND(HttpStatus.INTERNAL_SERVER_ERROR, 450004, "요청한 매퍼를 찾을 수 없습니다."), ITEM_MAPPING_ERROR(HttpStatus.INTERNAL_SERVER_ERROR, 450005,"RSS 항목 매핑 중 오류가 발생했습니다."), + // 스크래퍼 관련 오류 + SCRAPER_NOT_FOUND(HttpStatus.INTERNAL_SERVER_ERROR, 450007, "요청한 스크래퍼를 찾을 수 없습니다."), + SCRAPER_CONNECTION_ERROR(HttpStatus.INTERNAL_SERVER_ERROR, 450008, "기사 웹페이지 연결 중 오류가 발생했습니다."), + SCRAPER_PARSING_ERROR(HttpStatus.INTERNAL_SERVER_ERROR, 450009, "기사 내용 파싱 중 오류가 발생했습니다."), + EMPTY_ARTICLE_CONTENT(HttpStatus.INTERNAL_SERVER_ERROR, 450010, "스크래핑된 기사 내용이 없습니다."), + EMPTY_ARTICLE_IMAGE(HttpStatus.INTERNAL_SERVER_ERROR, 450011, "스크래핑된 기사 이미지가 없습니다."), + // 데이터베이스 관련 오류 DB_SAVE_ERROR(HttpStatus.INTERNAL_SERVER_ERROR, 530001, "RSS 뉴스를 저장하는 중 오류가 발생했습니다."), DUPLICATE_LINK_ERROR(HttpStatus.INTERNAL_SERVER_ERROR, 450006, "이미 존재하는 링크입니다."); diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/processor/RssFeedReader.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/processor/RssFeedReader.java index b717ad2..1e74a3a 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/processor/RssFeedReader.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/processor/RssFeedReader.java @@ -22,9 +22,7 @@ import com.rometools.rome.feed.synd.SyndFeed; import com.rometools.rome.io.SyndFeedInput; import com.rometools.rome.io.XmlReader; - import lombok.extern.slf4j.Slf4j; - /** * RSS 피드 URL을 통해 XML 피드를 읽고 파싱하여 {@link SyndEntry} 목록으로 반환하는 Reader 클래스. * Rome 라이브러리를 이용하여 RSS를 파싱하며, 유효하지 않은 URL 또는 파싱 오류에 대해 예외를 처리한다. @@ -43,83 +41,81 @@ public RssFeedReader(NewsInfoJpaRepository rssNewsRepository) { this.rssNewsRepository = rssNewsRepository; } - /** - * 주어진 피드 URL로부터 RSS 피드를 파싱하고, 최신 발행일 이후의 {@link SyndEntry} 리스트를 반환한다. - * - * @param feedUrl RSS 피드의 URL 문자열 - * @param mapperType 매퍼 타입 (언론사 코드) - * @return 파싱 및 필터링된 SyndEntry 목록 - * @modified 2025-05-18 최신 발행일 이후 데이터만 필터링하는 기능 추가 - * @author 함예정 - * @since 2025-05-10 - */ - public List getFeed(String feedUrl, String mapperType) { - URL url = getURL(feedUrl); - URLConnection connection = openConnectionWithTimeout(url); - List entries = parseRssEntries(connection); - - LocalDateTime latestPubDate = getLatestPubDate(mapperType); - - List filteredEntries = entries.stream() - .filter(entry -> isAfterLatestPubDate(entry, latestPubDate)) - .collect(Collectors.toList()); - - return filteredEntries; - } - - /** - * 언론사별 최신 발행일 조회 (캐싱 추가) - * - * @param mapperType 매퍼 타입 (언론사 코드) - * @return 최신 발행일 또는 기본값 - */ - private LocalDateTime getLatestPubDate(String mapperType) { - String guidPrefix = mapperType.toUpperCase(); - - LocalDateTime latestPubDate = rssNewsRepository.findLatestPubDateByGuidPrefix(guidPrefix); - - if (latestPubDate == null) { + /** + * 주어진 피드 URL로부터 RSS 피드를 파싱하고, 최신 발행일 이후의 {@link SyndEntry} 리스트를 반환한다. + * + * @param feedUrl RSS 피드의 URL 문자열 + * @param mapperType 매퍼 타입 (언론사 코드) + * @return 파싱 및 필터링된 SyndEntry 목록 + * @modified 2025-05-18 최신 발행일 이후 데이터만 필터링하는 기능 추가 + * @author 함예정 + * @since 2025-05-10 + */ + public List getFeed(String feedUrl, String mapperType) { + URL url = getURL(feedUrl); + URLConnection connection = openConnectionWithTimeout(url); + List entries = parseRssEntries(connection); + + LocalDateTime latestPubDate = getLatestPubDate(mapperType); + + List filteredEntries = entries.stream() + .filter(entry -> isAfterLatestPubDate(entry, latestPubDate)) + .collect(Collectors.toList()); + + return filteredEntries; + } + + /** + * 언론사별 최신 발행일 조회 (캐싱 추가) + * + * @param mapperType 매퍼 타입 (언론사 코드) + * @return 최신 발행일 또는 기본값 + */ + private LocalDateTime getLatestPubDate(String mapperType) { + LocalDateTime latestPubDate = rssNewsRepository.findLatestPubDateByGuidPrefix(mapperType); + + if (null == latestPubDate) { latestPubDate = getDefaultPubDate(); } - lastProcessedDateMap.put(mapperType, latestPubDate); - return latestPubDate; - } - - private LocalDateTime getDefaultPubDate() { - LocalDateTime latestPubDate = LocalDateTime.now().minusDays(1); - return latestPubDate; - } - - /** - * 항목의 발행일이 최신 발행일보다 이후인지 확인 - * - * @param entry RSS 항목 - * @param latestPubDate 최신 발행일 - * @return 최신 발행일 이후면 true - */ - private boolean isAfterLatestPubDate(SyndEntry entry, LocalDateTime latestPubDate) { - if (entry.getPublishedDate() == null) { - log.debug("발행일 없음 - 항목 제외: {}", entry.getTitle()); - return false; - } - - LocalDateTime pubDate = convertToLocalDateTime(entry.getPublishedDate()); - - boolean isAfter = pubDate.isAfter(latestPubDate); - - return isAfter; - } - - /** - * Date 객체를 LocalDateTime으로 변환 - * - * @param date 변환할 Date 객체 - * @return 변환된 LocalDateTime - */ - private LocalDateTime convertToLocalDateTime(Date date) { - return date.toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime(); - } + lastProcessedDateMap.put(mapperType, latestPubDate); + return latestPubDate; + } + + private LocalDateTime getDefaultPubDate() { + LocalDateTime latestPubDate = LocalDateTime.now().minusDays(1); + return latestPubDate; + } + + /** + * 항목의 발행일이 최신 발행일보다 이후인지 확인 + * + * @param entry RSS 항목 + * @param latestPubDate 최신 발행일 + * @return 최신 발행일 이후면 true + */ + private boolean isAfterLatestPubDate(SyndEntry entry, LocalDateTime latestPubDate) { + if (null == entry.getPublishedDate()) { + log.debug("발행일 없음 - 항목 제외: {}", entry.getTitle()); + return false; + } + + LocalDateTime pubDate = convertToLocalDateTime(entry.getPublishedDate()); + + boolean isAfter = pubDate.isAfter(latestPubDate); + + return isAfter; + } + + /** + * Date 객체를 LocalDateTime으로 변환 + * + * @param date 변환할 Date 객체 + * @return 변환된 LocalDateTime + */ + private LocalDateTime convertToLocalDateTime(Date date) { + return date.toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime(); + } /** * 문자열 형태의 URL을 {@link URL} 객체로 변환한다. @@ -127,8 +123,8 @@ private LocalDateTime convertToLocalDateTime(Date date) { * @param feedUrl 문자열 형태의 URL * @return URL 객체 * @throws RuntimeException 유효하지 않은 URL 형식일 경우 - * @author 함예정 * @since 2025-05-10 + * @author 함예정 */ private URL getURL(String feedUrl) { try { diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/AbstractRssMapper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/AbstractRssMapper.java index ffb9f35..1a9cec2 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/AbstractRssMapper.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/AbstractRssMapper.java @@ -5,12 +5,14 @@ import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.batch.RssSource; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.result.ScrapingResult; import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.ContentScraper; import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.factory.ScraperFactory; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.util.HtmlParser; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.util.ParagraphUtil; import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; import com.rometools.rome.feed.synd.SyndContent; import com.rometools.rome.feed.synd.SyndEntry; -import org.springframework.beans.factory.annotation.Autowired; import java.io.IOException; import java.time.LocalDateTime; @@ -26,77 +28,62 @@ * @since 2025-05-13 최초 작성 * @modified 2025-05-15 의존성 주입 방식 개선 (템플릿 메서드 패턴 적용) */ + public abstract class AbstractRssMapper { protected abstract ScraperFactory getScraperFactory(); - private static final ObjectMapper objectMapper = new ObjectMapper(); /** - * RSS 피드를 ArticleEntity 엔티티로 변환 + * 매퍼의 유형을 식별하는 코드 반환 + * 소문자 언론사 코드 형태 (예: "km", "da", "kh") + */ + public abstract String getMapperType(); + + /** + * RSS 피드를 ArticleEntity 엔티티로 변환하는 템플릿 메소드 * * @param entry 변환할 SyndEntry(Rss 데이터) 객체 * @param source RSS 소스 정보 * @return 변환된 ArticleEntity 엔티티 */ - public ArticleEntity mapToRssNews(SyndEntry entry, RssSource source) { - ArticleInfo info = extractBasicInfo(entry, source); - - String content = determineContent(info.description, info.link, source); - - return buildArticleEntity( - info.title, - info.link, - info.pubDate, - info.guid, - content, - info.category, - info.imageUrl); - } - - /** - * RSS 항목에서 기본 정보 추출 - */ - private ArticleInfo extractBasicInfo(SyndEntry entry, RssSource source) { - return new ArticleInfo( - extractTitle(entry), - extractLink(entry), - extractPubDate(entry), - extractGuid(entry, source), - extractDescription(entry), - extractCategory(entry, source), - extractImageUrl(entry) - ); - } + public final ArticleEntity mapToRssNews(SyndEntry entry, RssSource source) { + String guid = extractGuid(entry, source); + String title = extractTitle(entry); + String link = extractLink(entry); + LocalDateTime pubDate = extractPubDate(entry); + String category = extractCategory(entry, source); + String imageUrl = extractImageUrl(entry); - /** - * 본문 내용 결정 (RSS 또는 스크래핑) - */ - private String determineContent(String description, String link, RssSource source) { - if (source.hasFullContent()) { - return description; - } + String baseDescription = extractDescription(entry); + ScrapingResult result = performSpecificMapping(entry, source, link, baseDescription, imageUrl); - return getContentWithScraping(description, link, source.getMapperType()); + return ArticleEntity.builder() + .title(title) + .link(link) + .pubDate(pubDate) + .category(category) + .guid(guid) + .description(result.getDescription()) + .imageUrl(result.getImageUrl()) + .build(); } /** - * 기사 기본 정보를 담는 내부 클래스 - */ - private record ArticleInfo( - String title, + * 매퍼 유형에 따른 처리를 수행하는 추상 메소드 + * + * @param entry RSS 항목 + * @param source RSS 소스 정보 + * @param link 기사 링크 + * @param baseDescription RSS에서 추출한 기본 설명 + * @param baseImageUrl RSS에서 추출한 기본 이미지 URL + * @return 매핑 결과 (설명과 이미지 URL) + */ + protected abstract ScrapingResult performSpecificMapping( + SyndEntry entry, + RssSource source, String link, - LocalDateTime pubDate, - String guid, - String description, - String category, - String imageUrl - ) {} - - /** - * 매퍼의 유형을 식별하는 코드 반환 - * 소문자 언론사 코드 형태 (예: "km", "da", "kh") - */ - public abstract String getMapperType(); + String baseDescription, + String baseImageUrl); /** * Date 객체를 LocalDateTime으로 변환 @@ -105,7 +92,7 @@ private record ArticleInfo( * @return 변환된 LocalDateTime 객체, date가 null이면 현재 시간 반환 */ protected LocalDateTime convertToLocalDateTime(Date date) { - return date != null + return (null != date) ? date.toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime() : LocalDateTime.now(); } @@ -144,7 +131,7 @@ protected LocalDateTime extractPubDate(SyndEntry entry) { * @return 추출된 설명 */ protected String extractDescription(SyndEntry entry) { - return entry.getDescription() != null ? entry.getDescription().getValue() : null; + return null != entry.getDescription() ? entry.getDescription().getValue() : null; } /** @@ -163,54 +150,6 @@ protected String extractImageUrl(SyndEntry entry) { .orElse(""); } - /** - * 본문 내용을 가져오는 메서드 - * - * @param originalDescription RSS에서 추출한 기본 설명 - * @param link 기사 URL - * @param mapperType 매퍼 타입 - * @return 최종 본문 내용 - */ - private String getContentWithScraping(String originalDescription, String link, String mapperType) { - ContentScraper scraper = findScraper(mapperType); - return scrapeContent(scraper, link, originalDescription); - } - - /** - * 매퍼 타입에 맞는 스크래퍼를 찾음 - * - * @param mapperType 매퍼 타입 - * @return 스크래퍼 객체 - * @throws ArticleCollectorException 스크래퍼를 찾을 수 없는 경우 - */ - private ContentScraper findScraper(String mapperType) { - return getScraperFactory().getScraper(mapperType) - .orElseThrow(() -> new ArticleCollectorException(ArticleCollectorErrorCode.MAPPER_NOT_FOUND)); - } - - /** - * 스크래퍼를 사용하여 콘텐츠 스크래핑 수행 - * - * @param scraper 스크래퍼 객체 - * @param link 기사 URL - * @param fallbackContent 스크래핑 실패 시 사용할 대체 콘텐츠 - * @return 스크래핑된 콘텐츠 또는 대체 콘텐츠 - */ - private String scrapeContent(ContentScraper scraper, String link, String fallbackContent) { - try { - String scrapedContent = scraper.scrapeContent(link); - return scrapedContent != null && !scrapedContent.isEmpty() - ? scrapedContent - : fallbackContent; - } catch (ArticleCollectorException e) { - throw e; - } catch (IllegalArgumentException e) { - throw new ArticleCollectorException(ArticleCollectorErrorCode.INVALID_JOB_PARAMETER, e); - } catch (Exception e) { - throw new ArticleCollectorException(ArticleCollectorErrorCode.FEED_PARSING_ERROR, e); - } - } - /** * 카테고리 추출 메서드 * @@ -229,7 +168,32 @@ protected String extractCategory(SyndEntry entry, RssSource source) { * @param source RSS 소스 정보 * @return GUID */ - protected abstract String extractGuid(SyndEntry entry, RssSource source); + /** + * GUID 추출을 위한 템플릿 메소드 + * + * @param entry RSS 항목 + * @param source RSS 소스 정보 + * @return 신문사 코드 + 고유 ID 형태의 GUID + * @throws ArticleCollectorException 링크가 없거나 ID 추출 실패 시 + */ + protected final String extractGuid(SyndEntry entry, RssSource source) { + validateEntryLink(entry.getLink()); + + String uniqueId = extractUniqueIdFromLink(entry.getLink()); + validateUniqueId(uniqueId); + + return source.getCodePrefix() + uniqueId; + } + + /** + * 링크에서 고유 ID를 추출하는 추상 메소드 + * 각 매퍼가 자신의 URL 패턴에 맞게 구현 + * + * @param link 기사 링크 + * @return 추출된 고유 ID + * @throws ArticleCollectorException 링크가 null이거나 ID를 추출할 수 없는 경우 + */ + protected abstract String extractUniqueIdFromLink(String link); private ArticleEntity buildArticleEntity(String title, String link, LocalDateTime pubDate, String guid, String description, String category, String imageUrl) { @@ -245,89 +209,46 @@ private ArticleEntity buildArticleEntity(String title, String link, LocalDateTim } /** - * HTML 문자열에서 모든 태그를 제거하고 문단을 추출하는 공통 메서드 + * 링크가 null이거나 비어있는지 검증 * - * @param html HTML 문자열 - * @return 정제된 문단 리스트 + * @param link 검증할 링크 + * @throws ArticleCollectorException 링크가 유효하지 않을 경우 */ - protected List extractCleanParagraphs(String html) { - if (html == null || html.isEmpty()) { - return new ArrayList<>(); - } - - try { - String withBreaks = html.replaceAll("", "PARAGRAPH_BREAK"); - String noTags = withBreaks.replaceAll("<[^>]*>", ""); - String decoded = noTags.replace(" ", " ") - .replace(" ", " ") - .replace("<", "<") - .replace(">", ">") - .replace("&", "&") - .replace(""", "\"") - .replace("'", "'"); - - decoded = decoded.replaceAll("\\s+", " ").trim(); - String[] paragraphs = decoded.split("PARAGRAPH_BREAK"); - - return Arrays.stream(paragraphs) - .map(String::trim) - .filter(p -> !p.isEmpty()) - .collect(Collectors.toList()); - } catch (Exception e) { - List fallback = new ArrayList<>(); - fallback.add(removeAllHtmlTags(html)); - return fallback; + private void validateEntryLink(String link) { + if (isNullOrEmpty(link)) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.ARTICLE_ID_EXTRACTION_ERROR); } } /** - * 모든 HTML 태그 제거하는 공통 메서드 + * 고유 ID가 null이거나 비어있는지 검증 * - * @param html HTML 문자열 - * @return 태그가 제거된 문자열 + * @param uniqueId 검증할 고유 ID + * @throws ArticleCollectorException 고유 ID가 유효하지 않을 경우 */ - protected String removeAllHtmlTags(String html) { - if (html == null || html.isEmpty()) { - return ""; + private void validateUniqueId(String uniqueId) { + if (isNullOrEmpty(uniqueId)) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.ARTICLE_ID_EXTRACTION_ERROR); } - - String noTags = html.replaceAll("<[^>]*>", ""); - String decoded = decodeHtmlEntities(noTags); - - return decoded.replaceAll("\\s+", " ").trim(); } /** - * HTML 엔티티를 디코딩하는 유틸리티 메서드 + * 문자열이 null이거나 비어있는지 확인 * - * @param text HTML 엔티티가 포함된 문자열 - * @return 디코딩된 문자열 + * @param str 확인할 문자열 + * @return null이거나 비어있으면 true, 그렇지 않으면 false */ - protected String decodeHtmlEntities(String text) { - if (text == null || text.isEmpty()) { - return ""; - } - - return text.replace(" ", " ") - .replace(" ", " ") - .replace("<", "<") - .replace(">", ">") - .replace("&", "&") - .replace(""", "\"") - .replace("'", "'"); + private boolean isNullOrEmpty(String str) { + return null == str || str.trim().isEmpty(); } /** - * 문단 리스트를 JSON으로 직렬화하는 공통 메서드 + * 리스트가 null이거나 비어있는지 확인 * - * @param paragraphs 문단 리스트 - * @return JSON 문자열 + * @param list 확인할 리스트 + * @return null이거나 비어있으면 true, 그렇지 않으면 false */ - protected String serializeParagraphs(List paragraphs) { - try { - return objectMapper.writeValueAsString(paragraphs); - } catch (JsonProcessingException e) { - return String.join("\n\n", paragraphs); - } + private boolean isNullOrEmptyList(List list) { + return null == list || list.isEmpty(); } } \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/DongaRssMapper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/DongaRssMapper.java index d8a0ba5..8e9c976 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/DongaRssMapper.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/DongaRssMapper.java @@ -1,30 +1,40 @@ package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper.implement; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.batch.RssSource; import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper.AbstractRssMapper; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.result.ScrapingResult; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.ContentScraper; import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.factory.ScraperFactory; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; import com.rometools.rome.feed.synd.SyndEntry; +import groovy.util.logging.Slf4j; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; +import java.time.LocalDateTime; import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Optional; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.util.stream.Collectors; /** * 동아일보 RSS 매퍼 구현체 + * HTML에서 문단을 추출하고 PARAGRAPH_BREAK로 구분하여 반환한다. * * @author 양병학 * @since 2025-05-10 최초 작성 * @modified 2025-05-15 템플릿 메서드 패턴 적용, 의존성 주입 방식 개선 - * @modified 2025-05-17 HTML 태그 제거 및 문단 직렬화 기능 추가 + * @modified 2025-05-17 HTML 태그 제거 및 문단 구분 기능 추가 */ +@Slf4j @Component public class DongaRssMapper extends AbstractRssMapper { @@ -36,9 +46,10 @@ public DongaRssMapper(ScraperFactory scraperFactory) { } /** - * 템플릿 메서드 패턴 + * 템플릿 메서드 패턴에서 사용할 ScraperFactory 반환 * * @return 주입받은 ScraperFactory 인스턴스 + * @since 2025-05-15 */ @Override protected ScraperFactory getScraperFactory() { @@ -49,6 +60,7 @@ protected ScraperFactory getScraperFactory() { * 매퍼 타입 반환 * * @return 매퍼 타입 (da) + * @since 2025-05-10 */ @Override public String getMapperType() { @@ -56,86 +68,124 @@ public String getMapperType() { } /** - * GUID 추출, URI를 GUID로 사용 + * 카테고리 정보 추출 * * @param entry RSS 항목 * @param source RSS 소스 정보 - * @return URI 또는 생성된 고유 ID + * @return 카테고리 + * @since 2025-05-10 */ @Override - protected String extractGuid(SyndEntry entry, RssSource source) { - String uniqueId = extractUniqueIdFromLink(entry.getLink()); - return source.getCodePrefix() + uniqueId; + protected String extractCategory(SyndEntry entry, RssSource source) { + return source.getCategoryName(); } /** - * 동아일보 링크에서 고유 ID 추출 + * 본문만 스크래핑 하는 메소드 * - * @param link 기사 링크 - * @return 추출된 고유 ID - * @throws ArticleCollectorException 링크가 null이거나 ID를 추출할 수 없는 경우 + * @return ScrapingResult 객체 (스크래핑 정보) + * @since 2025-05-17 */ - private String extractUniqueIdFromLink(String link) { - if (link == null || link.trim().isEmpty()) { - throw new ArticleCollectorException(ArticleCollectorErrorCode.ITEM_MAPPING_ERROR); - } - - try { - String[] parts = link.split("/"); - if (parts.length >= 2) { - return parts[parts.length - 2]; - } - } catch (Exception e) { - throw new ArticleCollectorException(ArticleCollectorErrorCode.ITEM_MAPPING_ERROR, e); - } - - throw new ArticleCollectorException(ArticleCollectorErrorCode.ITEM_MAPPING_ERROR); + @Override + protected ScrapingResult performSpecificMapping( + SyndEntry entry, + RssSource source, + String link, + String baseDescription, + String baseImageUrl) { + + String scrapedContent = scrapeContent(link); + + return new ScrapingResult(scrapedContent, baseImageUrl); } /** - * 카테고리 정보 추출 + * 본문 스크래핑을 진행하는 메소드 * - * @param entry RSS 항목 - * @param source RSS 소스 정보 - * @return 결합된 카테고리 문자열 + * @return 스크래핑된 본문 String + * @author 양병학 + * @since 2025-05-17 */ - @Override - protected String extractCategory(SyndEntry entry, RssSource source) { - return source.getCategoryName(); + private String scrapeContent(String link) { + ContentScraper scraper = getScraperOrThrow(); + String scrapedContent = scrapeAndValidateContent(scraper, link); + return removeUnwantedPhrases(scrapedContent); + } + + private ContentScraper getScraperOrThrow() { + return getScraperFactory().getScraper(getMapperType()) + .orElseThrow(() -> new ArticleCollectorException(ArticleCollectorErrorCode.SCRAPER_NOT_FOUND)); + } + + private String scrapeAndValidateContent(ContentScraper scraper, String link) { + String scrapedContent = scraper.scrapeContent(link); + validateScrapedContent(scrapedContent); + return scrapedContent; + } + + private void validateScrapedContent(String content) { + Optional.ofNullable(content) + .filter(c -> !c.isEmpty()) + .orElseThrow(() -> new ArticleCollectorException(ArticleCollectorErrorCode.EMPTY_ARTICLE_CONTENT)); } /** - * RSS description에서 HTML 태그를 제거하고 문단을 추출하여 직렬화 + * 동아일보 링크에서 고유 ID 추출 * - * @param entry RSS 항목 - * @return 직렬화된 문단 JSON 또는 원본 description + * @param link 기사 링크 + * @return 추출된 고유 ID + * @throws ArticleCollectorException 링크가 null이거나 ID를 추출할 수 없는 경우 + * @since 2025-05-10 */ @Override - protected String extractDescription(SyndEntry entry) { - if (entry.getDescription() == null) { - return ""; - } + protected String extractUniqueIdFromLink(String link) { + validateLink(link); + return extractIdFromParts(link); + } - String rawDescription = entry.getDescription().getValue(); - if (rawDescription == null || rawDescription.isEmpty()) { - return ""; - } + private void validateLink(String link) { + Optional.ofNullable(link) + .map(String::trim) + .filter(l -> !l.isEmpty()) + .orElseThrow(() -> new ArticleCollectorException(ArticleCollectorErrorCode.ARTICLE_ID_EXTRACTION_ERROR)); + } + + private String extractIdFromParts(String link) { + return Optional.of(link.split("/")) + .filter(parts -> parts.length >= 2) + .map(parts -> parts[parts.length - 2]) + .filter(this::isValidId) + .orElseThrow(() -> new ArticleCollectorException(ArticleCollectorErrorCode.ARTICLE_ID_EXTRACTION_ERROR)); + } - return processHtmlContent(rawDescription); + private boolean isValidId(String id) { + return id != null && !id.trim().isEmpty(); } /** - * HTML 콘텐츠 처리하여 정제된 문단 직렬화 + * 불용어 제거 메서드 + * 저작권 문구, 광고 문구 등 불필요한 문구 제거 * - * @param htmlContent HTML 콘텐츠 - * @return 직렬화된 문단 JSON 또는 태그가 제거된 텍스트 + * @param content 원본 내용 + * @return 불용어가 제거된 내용 + * @since 2025-05-17 */ - private String processHtmlContent(String htmlContent) { - try { - List paragraphs = extractCleanParagraphs(htmlContent); - return serializeParagraphs(paragraphs); - } catch (Exception e) { - return removeAllHtmlTags(htmlContent); - } + private static final List UNWANTED_PATTERNS = List.of( + "\\(c\\)\\s*동아일보", + "저작권자.*동아일보.*무단.*전재.*금지", + "무단전재 및 재배포 금지", + "\\S+기자\\s+\\S+@donga\\.com", + "동아닷컴 뉴스스탠드", + "동아일보 홈페이지", + "PARAGRAPH_BREAKPARAGRAPH_BREAK" + ); + + protected String removeUnwantedPhrases(String content) { + return Optional.ofNullable(content) + .filter(c -> !c.isEmpty()) + .map(c -> UNWANTED_PATTERNS.stream() + .reduce(c, (current, regex) -> current.replaceAll(regex, ""))) + .map(String::trim) + .orElse(""); } } \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/KhanRssMapper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/KhanRssMapper.java index 875bb38..8e2e197 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/KhanRssMapper.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/KhanRssMapper.java @@ -1,36 +1,35 @@ package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper.implement; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.batch.RssSource; import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper.AbstractRssMapper; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.result.ScrapingResult; import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.ContentScraper; import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.factory.ScraperFactory; import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; import com.rometools.rome.feed.synd.SyndEntry; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; import java.time.LocalDateTime; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.stream.Collectors; /** * 경향신문 RSS 매퍼 구현체 + * ContentScraper를 사용하여 기사 본문 스크래핑 * * @author 양병학 * @since 2025-05-10 최초 작성 * @modified 2025-05-15 템플릿 메서드 패턴 적용, 의존성 주입 방식 개선 - * @modified 2025-05-17 mapToRssNews 메서드 오버라이드 및 문단 직렬화 기능 추가 + * @modified 2025-05-17 스크래핑 로직 추가 및 불용어 제거 기능 추가 */ @Component public class KhanRssMapper extends AbstractRssMapper { + private static final Logger log = LoggerFactory.getLogger(KhanRssMapper.class); private final ScraperFactory scraperFactory; @Autowired @@ -39,135 +38,139 @@ public KhanRssMapper(ScraperFactory scraperFactory) { } /** - * 템플릿 메서드 패턴 + * 매퍼 타입 반환 * - * @return 주입받은 ScraperFactory 인스턴스 + * @return 매퍼 타입 (kh: 경향신문) + * @since 2025-05-10 */ @Override - protected ScraperFactory getScraperFactory() { - return this.scraperFactory; + public String getMapperType() { + return "kh"; } /** - * RSS 피드를 ArticleEntity 엔티티로 변환 (오버라이드) - * 경향신문 특화 구현 - 본문과 이미지 URL을 효율적으로 스크래핑하고 문단 직렬화 + * 본문 + 이미지 링크를 스크래핑 하는 메소드 * - * @param entry 변환할 SyndEntry(Rss 데이터) 객체 - * @param source RSS 소스 정보 - * @return 변환된 ArticleEntity 엔티티 + * @return ScrapingResult 객체 (스크래핑 정보) + * @since 2025-05-17 */ @Override - public ArticleEntity mapToRssNews(SyndEntry entry, RssSource source) { - String title = extractTitle(entry); - String link = extractLink(entry); - LocalDateTime pubDate = extractPubDate(entry); - String guid = extractGuid(entry, source); - String description = extractDescription(entry); - String category = extractCategory(entry, source); - String imageUrl = super.extractImageUrl(entry); - - ContentResult contentResult; - if (source.hasFullContent()) { - contentResult = new ContentResult(description, imageUrl); - } else { - contentResult = scrapeContentAndImage(link, description, imageUrl); + protected ScrapingResult performSpecificMapping( + SyndEntry entry, + RssSource source, + String link, + String baseDescription, + String baseImageUrl) { + + ContentScraper scraper = getContentScraper(); + + String scrapedContent = scrapeAndProcessContent(scraper, link); + + String finalImageUrl = baseImageUrl; + if (finalImageUrl == null || finalImageUrl.isEmpty()) { + finalImageUrl = scrapeImageUrl(scraper, link); } - return ArticleEntity.builder() - .title(title) - .link(link) - .pubDate(pubDate) - .category(category) - .guid(guid) - .description(contentResult.getContent()) - .imageUrl(contentResult.getImageUrl()) - .build(); + return new ScrapingResult(scrapedContent, finalImageUrl); } /** - * 본문과 이미지 URL을 스크래핑하고 처리하는 메서드 + * 경향신문 스크래퍼 가져오기 * - * @param link 기사 URL - * @param fallbackDescription 스크래핑 실패 시 사용할 설명 - * @param fallbackImageUrl 스크래핑 실패 시 사용할 이미지 URL - * @return 처리된 콘텐츠와 이미지 URL이 포함된 결과 객체 + * @return 경향신문 ContentScraper + * @throws ArticleCollectorException 스크래퍼를 찾을 수 없는 경우 + * @since 2025-05-17 */ - private ContentResult scrapeContentAndImage(String link, String fallbackDescription, String fallbackImageUrl) { - try { - ContentScraper scraper = getScraperFactory().getScraper(getMapperType()) - .orElseThrow(() -> new ArticleCollectorException(ArticleCollectorErrorCode.MAPPER_NOT_FOUND)); - - String content = fallbackDescription; - String scrapedContent = scraper.scrapeContent(link); - if (scrapedContent != null && !scrapedContent.isEmpty()) { - List paragraphs = Arrays.asList(scrapedContent.split("\n\n")); - content = serializeParagraphs(paragraphs); - } - - String imageUrl = fallbackImageUrl; - if (imageUrl == null || imageUrl.isEmpty()) { - imageUrl = scraper.scrapeImageUrl(link); - } - - return new ContentResult(content, imageUrl); - } catch (Exception e) { - System.err.println("경향신문 스크래핑 실패: " + e.getMessage()); - return new ContentResult(fallbackDescription, fallbackImageUrl); - } + private ContentScraper getContentScraper() { + return scraperFactory.getScraper("kh") + .orElseThrow(() -> new ArticleCollectorException(ArticleCollectorErrorCode.SCRAPER_NOT_FOUND)); } /** - * 콘텐츠 결과를 담는 내부 클래스 + * 기사 콘텐츠 스크래핑 및 처리 + * + * @param scraper 사용할 ContentScraper + * @param link 기사 링크 + * @return 스크래핑되고 처리된 콘텐츠 + * @throws ArticleCollectorException 스크래핑 실패 시 (내용이 비어있음) + * @since 2025-05-17 */ - private static class ContentResult { - private final String content; - private final String imageUrl; - - public ContentResult(String content, String imageUrl) { - this.content = content; - this.imageUrl = imageUrl; - } + private String scrapeAndProcessContent(ContentScraper scraper, String link) { + String scrapedContent = scrapeContent(scraper, link); + validateScrapedContent(scrapedContent); + return removeUnwantedPhrases(scrapedContent); + } - public String getContent() { - return content; - } + private String scrapeContent(ContentScraper scraper, String link) { + return scraper.scrapeContent(link); + } - public String getImageUrl() { - return imageUrl; + private void validateScrapedContent(String content) { + if (null == content || content.isEmpty()) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.EMPTY_ARTICLE_CONTENT); } } /** - * 매퍼 타입 반환 + * 이미지 URL 스크래핑 * - * @return 매퍼 타입 (kh) + * @param scraper 사용할 ContentScraper + * @param link 기사 링크 + * @return 스크래핑된 이미지 URL + * @throws ArticleCollectorException 스크래핑 실패 시 + * @since 2025-05-17 */ - @Override - public String getMapperType() { - return "kh"; + private String scrapeImageUrl(ContentScraper scraper, String link) { + String imageUrl = scraper.scrapeImageUrl(link); + validateImageUrl(imageUrl); + return imageUrl; + } + + private void validateImageUrl(String imageUrl) { + if (null == imageUrl || imageUrl.isEmpty()) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.EMPTY_ARTICLE_IMAGE); + } } + + /** - * GUID 추출, 링크에서 기사 ID를 추출하여 사용 + * 템플릿 메서드 패턴에서 사용할 ScraperFactory 반환 * - * @param entry RSS 항목 - * @param source RSS 소스 정보 - * @return 신문사 코드 + 기사 ID 형태의 GUID + * @return 주입받은 ScraperFactory 인스턴스 + * @since 2025-05-15 */ @Override - protected String extractGuid(SyndEntry entry, RssSource source) { - String uniqueId = extractUniqueIdFromLink(entry.getLink()); - return source.getCodePrefix() + uniqueId; + protected ScraperFactory getScraperFactory() { + return this.scraperFactory; } /** - * 경향신문 링크에서 고유 ID 추출 + * 불용어 제거 메서드 + * 저작권 문구, 광고 문구 등 불필요한 문구 제거 * - * @param link 기사 링크 - * @return 추출된 고유 ID - * @throws ArticleCollectorException 링크가 null이거나 ID를 추출할 수 없는 경우 + * @param content 원본 내용 + * @return 불용어가 제거된 내용 + * @since 2025-05-17 */ - private String extractUniqueIdFromLink(String link) { + private String removeUnwantedPhrases(String content) { + if (content == null || content.isEmpty()) { + return ""; + } + + content = content.replaceAll("\\(c\\)\\s*경향신문", ""); + content = content.replaceAll("저작권자.*경향신문.*무단.*전재.*금지", ""); + content = content.replaceAll("무단전재 및 재배포 금지", ""); + content = content.replaceAll("\\S+기자\\s+\\S+@khan\\.co\\.kr", ""); + content = content.replaceAll("경향신문 뉴스스탠드", ""); + content = content.replaceAll("경향닷컴", ""); + content = content.replaceAll("PARAGRAPH_BREAKPARAGRAPH_BREAK", "PARAGRAPH_BREAK"); + + return content.trim(); + } + + @Override + protected String extractUniqueIdFromLink(String link) { validateLink(link); try { @@ -183,6 +186,7 @@ private String extractUniqueIdFromLink(String link) { * * @param link 검사할 링크 * @throws ArticleCollectorException 링크가 null이거나 비어있는 경우 + * @since 2025-05-10 */ private void validateLink(String link) { if (link == null || link.trim().isEmpty()) { @@ -195,6 +199,7 @@ private void validateLink(String link) { * * @param link 분리할 링크 * @return 경로 부분 배열 + * @since 2025-05-10 */ private String[] splitLinkPath(String link) { return link.split("/"); @@ -206,14 +211,17 @@ private String[] splitLinkPath(String link) { * @param pathParts 경로 부분 배열 * @return 기사 ID * @throws ArticleCollectorException 기사 ID를 찾을 수 없는 경우 + * @since 2025-05-10 */ private String findArticleIdInPath(String[] pathParts) { - for (int i = 0; i < pathParts.length; i++) { - if ("article".equals(pathParts[i]) && i + 1 < pathParts.length) { - String id = pathParts[i + 1]; - if (isValidArticleId(id)) { - return id; - } + for (int i = 0; i < pathParts.length - 1; i++) { + if (!"article".equals(pathParts[i])) { + continue; + } + + String id = pathParts[i + 1]; + if (isValidArticleId(id)) { + return id; } } @@ -225,6 +233,7 @@ private String findArticleIdInPath(String[] pathParts) { * * @param id 검사할 ID * @return 유효성 여부 + * @since 2025-05-10 */ private boolean isValidArticleId(String id) { return id != null && !id.trim().isEmpty(); @@ -235,6 +244,7 @@ private boolean isValidArticleId(String id) { * * @param entry RSS 항목 * @return 발행일 LocalDateTime + * @since 2025-05-10 */ @Override protected LocalDateTime extractPubDate(SyndEntry entry) { @@ -250,6 +260,7 @@ protected LocalDateTime extractPubDate(SyndEntry entry) { * * @param entry RSS 항목 * @return 추출된 발행일, 없으면 현재 시간 + * @since 2025-05-10 */ private LocalDateTime extractDcDate(SyndEntry entry) { return entry.getForeignMarkup().stream() @@ -265,6 +276,7 @@ private LocalDateTime extractDcDate(SyndEntry entry) { * * @param dateString 날짜 문자열 * @return 파싱된 LocalDateTime, 실패 시 현재 시간 + * @since 2025-05-10 */ private LocalDateTime parseDateTime(String dateString) { try { @@ -280,9 +292,12 @@ private LocalDateTime parseDateTime(String dateString) { * @param entry RSS 항목 * @param source RSS 소스 정보 * @return 결합된 카테고리 문자열 + * @since 2025-05-10 */ @Override protected String extractCategory(SyndEntry entry, RssSource source) { return source.getCategoryName(); } + + } \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/KmibRssMapper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/KmibRssMapper.java index bcd8f8b..3011a78 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/KmibRssMapper.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/KmibRssMapper.java @@ -1,51 +1,57 @@ package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper.implement; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.batch.RssSource; import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper.AbstractRssMapper; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.result.ScrapingResult; import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.factory.ScraperFactory; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.util.HtmlParser; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.util.ParagraphUtil; import com.rometools.rome.feed.synd.SyndEntry; +import groovy.util.logging.Slf4j; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; -import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; -import java.util.stream.Collectors; /** * 국민일보 RSS 매퍼 구현체 + * HTML 태그를 제거하고 문단을 PARAGRAPH_BREAK로 구분하여 반환한다. * * @author 양병학 * @since 2025-05-10 최초 작성 * @modified 2025-05-15 템플릿 메서드 패턴 적용, 의존성 주입 방식 개선 - * @modified 2025-05-17 HTML 태그 제거 및 문단 직렬화 기능 추가 + * @modified 2025-05-17 HTML 태그 제거 및 문단 구분 기능 추가 */ +@Slf4j @Component public class KmibRssMapper extends AbstractRssMapper { private static final Pattern ARCID_PATTERN = Pattern.compile("arcid=([0-9]+)"); - private static final Pattern IMG_SRC_PATTERN = Pattern.compile(""" - paragraphs = htmlParser.extractCleanParagraphs(rawDescription); + validateParagraphs(paragraphs); + + return paragraphUtil.serializeParagraphs(paragraphs); } /** * 링크에서 arcid 값 추출 * * @param link 기사 링크 - * @return 추출된 arcid, 없으면 타임스탬프 반환 + * @return 추출된 arcid + * @throws ArticleCollectorException 링크가 null이거나 arcid 추출 실패 시 + * @since 2025-05-10 */ - private String extractArcIdFromLink(String link) { - if (link == null || link.trim().isEmpty()) { - throw new ArticleCollectorException(ArticleCollectorErrorCode.ITEM_MAPPING_ERROR); - } + @Override + protected String extractUniqueIdFromLink(String link) { + validateLink(link); - Matcher matcher = ARCID_PATTERN.matcher(link); - if (matcher.find()) { - String arcId = matcher.group(1); - if (arcId != null && !arcId.trim().isEmpty()) { - return arcId; - } + String arcId = extractArcIdFromLink(link); + validateArcId(arcId); + + return arcId; + } + + private void validateLink(String link) { + if (null == link || link.trim().isEmpty()) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.ARTICLE_ID_EXTRACTION_ERROR); } + } - throw new ArticleCollectorException(ArticleCollectorErrorCode.ITEM_MAPPING_ERROR); + private void validateArcId(String arcId) { + if (null == arcId || arcId.trim().isEmpty()) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.ARTICLE_ID_EXTRACTION_ERROR); + } } /** @@ -111,6 +149,7 @@ private String extractArcIdFromLink(String link) { * * @param entry RSS 항목 * @return 이미지 URL + * @since 2025-05-10 */ @Override protected String extractImageUrl(SyndEntry entry) { @@ -122,48 +161,111 @@ protected String extractImageUrl(SyndEntry entry) { return extractImageFromDescription(entry); } - /** - * Description 내용에서 이미지 URL을 추출 - * - * @param entry RSS 항목 - * @return 추출된 이미지 URL 또는 빈 문자열 - */ private String extractImageFromDescription(SyndEntry entry) { - if (entry.getDescription() == null) { + if (isEntryDescriptionEmpty(entry)) { return ""; } String description = entry.getDescription().getValue(); - if (description == null || description.isEmpty()) { + if (isNullOrEmpty(description)) { return ""; } - Matcher matcher = IMG_SRC_PATTERN.matcher(description); + return extractImageUrlFromHtml(description); + } + + /** + * HTML에서 이미지 URL을 추출합니다. + * + * @param html 이미지 URL을 추출할 HTML 문자열 + * @return 추출된 이미지 URL 또는 빈 문자열 + * @since 2025-05-18 + * @author 양병학 + */ + private String extractImageUrlFromHtml(String html) { + Matcher matcher = IMG_SRC_PATTERN.matcher(html); return matcher.find() ? matcher.group(1) : ""; } /** - * RSS description에서 HTML 태그를 제거하고 문단을 추출하여 직렬화 + * RSS description에서 HTML 태그를 제거하고 문단을 추출하여 PARAGRAPH_BREAK로 구분 + * + * 1. 설명 컨텐츠 존재 여부 확인 + * 2. HTML 태그 제거 및 문단 분리 + * 3. 문단을 PARAGRAPH_BREAK로 구분하여 반환 * * @param entry RSS 항목 - * @return 직렬화된 문단 JSON 또는 원본 description + * @return PARAGRAPH_BREAK로 구분된 문단 텍스트 + * @throws ArticleCollectorException 설명이 비어있거나 파싱 중 오류 발생 시 + * @since 2025-05-10 + * @author 양병학 + * @modified 2025-05-17 HTML 태그 제거 및 문단 구분 기능 추가 + * @modified 2025-05-18 예외 처리 로직 개선 */ @Override protected String extractDescription(SyndEntry entry) { - if (entry.getDescription() == null) { - return ""; - } + validateDescriptionExists(entry); String rawDescription = entry.getDescription().getValue(); - if (rawDescription == null || rawDescription.isEmpty()) { - return ""; - } + validateRawDescription(rawDescription); try { - List paragraphs = extractCleanParagraphs(rawDescription); - return serializeParagraphs(paragraphs); + List paragraphs = htmlParser.extractCleanParagraphs(rawDescription); + validateParagraphs(paragraphs); + return paragraphUtil.serializeParagraphs(paragraphs); } catch (Exception e) { - return removeAllHtmlTags(rawDescription); + throw new ArticleCollectorException(ArticleCollectorErrorCode.RSS_PARSING_ERROR, e); + } + } + + private void validateDescriptionExists(SyndEntry entry) { + if (null == entry.getDescription()) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.RSS_CONTENT_EMPTY); + } + } + + private void validateRawDescription(String rawDescription) { + if (null == rawDescription || rawDescription.isEmpty()) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.RSS_CONTENT_EMPTY); + } + } + + private void validateParagraphs(List paragraphs) { + if (paragraphs.isEmpty()) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.RSS_CONTENT_EMPTY); + } + } + + /** + * 링크에서 arcId를 추출합니다. + * + * @param link arcId를 추출할 링크 + * @return 추출된 arcId + * @throws ArticleCollectorException arcId를 추출할 수 없는 경우 발생 + * @since 2025-05-18 + * @author 양병학 + */ + private String extractArcIdFromLink(String link) { + Matcher matcher = ARCID_PATTERN.matcher(link); + if (!matcher.find()) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.ARTICLE_ID_EXTRACTION_ERROR); } + return matcher.group(1); + } + + private boolean isEntryDescriptionEmpty(SyndEntry entry) { + return entry.getDescription() == null; + } + + /** + * 문자열이 null이거나 비어있는지 확인합니다. + * + * @param str 확인할 문자열 + * @return null이거나 비어있으면 true, 그렇지 않으면 false + */ + private boolean isNullOrEmpty(String str) { + return null == str || str.isEmpty(); } + + } \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/result/ScrapingResult.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/result/ScrapingResult.java new file mode 100644 index 0000000..1b53272 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/result/ScrapingResult.java @@ -0,0 +1,19 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.result; + +public class ScrapingResult { + private final String description; + private final String imageUrl; + + public ScrapingResult(String description, String imageUrl) { + this.description = description; + this.imageUrl = imageUrl; + } + + public String getDescription() { + return description; + } + + public String getImageUrl() { + return imageUrl; + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/ContentScraper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/ContentScraper.java index 6ce2fb9..74e6511 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/ContentScraper.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/ContentScraper.java @@ -2,6 +2,7 @@ import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; +import lombok.extern.slf4j.Slf4j; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; @@ -16,7 +17,6 @@ * @since 2025-05-13 최초 작성 */ public interface ContentScraper { - /** * 뉴스 URL에서 본문 내용을 문단 단위로 스크래핑 * @@ -27,10 +27,10 @@ public interface ContentScraper { List scrapeParagraphs(String url) throws ArticleCollectorException; /** - * 뉴스 URL에서 본문 내용을 텍스트로 스크래핑 + * 기사 URL에서 본문 내용을 PARAGRAPH_BREAK로 구분된 문자열로 스크래핑 * - * @param url 뉴스 URL - * @return 스크래핑된 본문 + * @param url 기사 URL + * @return PARAGRAPH_BREAK로 구분된 본문 문자열 * @throws ArticleCollectorException 스크래핑 중 오류 발생 시 */ String scrapeContent(String url) throws ArticleCollectorException; @@ -40,8 +40,9 @@ public interface ContentScraper { * * @param url 뉴스 URL * @return 스크래핑된 이미지 URL + * @throws ArticleCollectorException 스크래핑 중 오류 발생 시 */ - String scrapeImageUrl(String url); + String scrapeImageUrl(String url) throws ArticleCollectorException; /** * 스크래퍼가 지원하는 Mapper type 반환 @@ -50,15 +51,14 @@ public interface ContentScraper { */ String getSupportedMapperType(); - /** * URL에 연결하여 Document 객체 반환 (기본 구현) * * @param url 연결할 URL * @return 파싱된 JSoup Document - * @throws ArticleCollectorException 연결 오류 발생 시 FEED_PARSING_ERROR 예외 발생 + * @throws ArticleCollectorException 연결 오류 발생 시 SCRAPER_CONNECTION_ERROR 예외 발생 */ - default Document connectToUrl(String url) { + default Document connectToUrl(String url) throws ArticleCollectorException { try { return Jsoup.connect(url) .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36") @@ -68,7 +68,7 @@ default Document connectToUrl(String url) { .followRedirects(true) .get(); } catch (IOException e) { - throw new ArticleCollectorException(ArticleCollectorErrorCode.FEED_PARSING_ERROR, e); + throw new ArticleCollectorException(ArticleCollectorErrorCode.SCRAPER_CONNECTION_ERROR, e); } } } \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/factory/ScraperFactory.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/factory/ScraperFactory.java index 57a2985..91cebdf 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/factory/ScraperFactory.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/factory/ScraperFactory.java @@ -1,5 +1,7 @@ package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.factory; +import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; +import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; @@ -41,6 +43,10 @@ public ScraperFactory(List availableScrapers) { * @return 해당 타입의 스크래퍼 or null일시 Optional로 빈 값 반환 */ public Optional getScraper(String mapperType) { - return Optional.ofNullable(scrapers.get(mapperType)); + ContentScraper scraper = scrapers.get(mapperType); + if (scraper == null) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.SCRAPER_NOT_FOUND); + } + return Optional.of(scraper); } } \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/implement/DongaContentScraper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/implement/DongaContentScraper.java index 7885ae3..e44980a 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/implement/DongaContentScraper.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/implement/DongaContentScraper.java @@ -3,23 +3,28 @@ import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.ContentScraper; -import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.util.HtmlScraperUtils; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.util.HtmlScraperUtils; +import lombok.extern.slf4j.Slf4j; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.springframework.stereotype.Component; -import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; +import java.util.Objects; +import java.util.stream.Collectors; /** * 동아일보 기사 본문 스크래퍼 구현체 * * @author 양병학 * @since 2025-05-13 최초 작성 + * @modified 2025-05-17 동아일보 스포츠 기사 스크래핑 기능 추가 */ +@Slf4j @Component public class DongaContentScraper implements ContentScraper { @@ -28,95 +33,213 @@ public class DongaContentScraper implements ContentScraper { * * @param url 기사 URL * @return 문단 단위로 문단 텍스트 + * @throws ArticleCollectorException 스크래핑 중 오류 발생 시 */ @Override - public List scrapeParagraphs(String url) { + public List scrapeParagraphs(String url) throws ArticleCollectorException { Document document = connectToUrl(url); + List content = extractContent(url, document); + validateContent(content); + return content; + } + + /** + * URL과 문서에 따라 적절한 콘텐츠 추출 메서드를 호출 + * + * @param url 기사 URL + * @param document 파싱된 JSoup Document + * @return 추출된 문단 리스트 + * @throws ArticleCollectorException 콘텐츠 추출 중 오류 발생 시 + */ + private List extractContent(String url, Document document) throws ArticleCollectorException { + if (isSportsArticle(url, document)) { + return extractDongaSportsContent(document); + } return extractDongaContent(document); } /** - * 동아일보 본문 추출 (section.news_view에서 h2, figure 제외) + * 추출된 콘텐츠의 유효성 검증 + * + * @param content 추출된 문단 리스트 + * @throws ArticleCollectorException 콘텐츠가 비어있거나 유효하지 않을 때 + */ + private void validateContent(List content) throws ArticleCollectorException { + if (content == null || content.isEmpty() || content.stream().allMatch(String::isEmpty)) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.EMPTY_ARTICLE_CONTENT); + } + } + + /** + * URL 또는 문서 구조를 기반으로 스포츠 기사인지 확인 + * + * @param url 기사 URL + * @param document 파싱된 JSoup Document + * @return 스포츠 기사 여부 + */ + private boolean isSportsArticle(String url, Document document) { + if (url.contains("sports.donga.com") || url.contains("/sports/")) { + return true; + } + + Element articleWord = document.selectFirst("div.article_word#article_body"); + return articleWord != null; + } + + /** + * 동아일보 일반 기사 본문 추출 (section.news_view에서 h2, figure 제외) * * @param document JSoup Document * @return 문단 리스트 + * @throws ArticleCollectorException 본문 파싱 중 오류 발생 시 */ - private List extractDongaContent(Document document) { - Element newsView = HtmlScraperUtils.findElement(document, "section.news_view"); - if (null == newsView) { - return new ArrayList<>(); + private List extractDongaContent(Document document) throws ArticleCollectorException { + try { + Element newsView = findNewsViewElement(document); + if (null == newsView) { + return new ArrayList<>(); + } + + // HTML 처리 및 불필요한 태그 제거 + Element processedView = processHtmlElement(newsView); + + // 전체 텍스트 추출 + String fullText = processedView.text(); + + // 문단 추출 + List paragraphs = extractParagraphsFromText(fullText); + + // 문단이 없는 경우 전체 텍스트를 하나의 문단으로 처리 + if (paragraphs.isEmpty() && !fullText.trim().isEmpty()) { + paragraphs.add(fullText.trim()); + } + + return paragraphs; + } catch (Exception e) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.SCRAPER_PARSING_ERROR, e); } + } - Element processedView = HtmlScraperUtils.removeTags(newsView, "h2", "figure", "img"); + /** + * 문서에서 뉴스 본문 영역 찾기 + * + * @param document JSoup Document + * @return 뉴스 본문 Element + */ + private Element findNewsViewElement(Document document) { + return HtmlScraperUtils.findElement(document, "section.news_view"); + } - String fullText = processedView.text(); - List paragraphs = extractParagraphsByQuotes(fullText); + /** + * HTML 요소 처리 -
태그를 문단 구분자로 변환하고 불필요한 태그 제거 + * + * @param element 처리할 HTML 요소 + * @return 처리된 HTML 요소 + */ + private Element processHtmlElement(Element element) { + String html = element.html(); + html = html.replaceAll("", "PARAGRAPH_BREAK"); + Element parsedElement = Jsoup.parse(html).body(); - return paragraphs; + return HtmlScraperUtils.removeTags(parsedElement, "h2", "figure", "img"); } /** - * 큰따옴표 기준으로 문단 추출 + * 텍스트에서 문단 추출 * * @param text 전체 텍스트 * @return 문단 리스트 */ - private List extractParagraphsByQuotes(String text) { - List paragraphs = new ArrayList<>(); + private List extractParagraphsFromText(String text) { + String[] paragraphsArray = text.split("PARAGRAPH_BREAK"); - String[] parts = text.split("\""); + return Arrays.stream(paragraphsArray) + .map(String::trim) + .filter(p -> !p.isEmpty()) + .collect(Collectors.toList()); + } - for (int i = 1; i < parts.length; i += 2) { - String paragraph = parts[i].trim(); - if (!paragraph.isEmpty()) { - paragraphs.add(paragraph); - } + /** + * 동아일보 스포츠 기사 본문 추출 (div.article_word#article_body) + * + * @param document JSoup Document + * @return 문단 리스트 + * @throws ArticleCollectorException 본문 파싱 중 오류 발생 시 + */ + private List extractDongaSportsContent(Document document) throws ArticleCollectorException { + // 시도할 선택자들을 배열로 정의 + String[] selectors = { + "div.article_word#article_body", + "div.article_word" + }; + + Element articleBody = Arrays.stream(selectors) + .map(selector -> HtmlScraperUtils.findElement(document, selector)) + .filter(Objects::nonNull) + .findFirst() + .orElse(null); + + if (articleBody == null) { + return new ArrayList<>(); } - if (paragraphs.isEmpty() && !text.trim().isEmpty()) { - paragraphs.add(text.trim()); + try { + String[] selectorsToRemove = { + "div.photoAd", + "div.subcont_ad01", + "div.view_center", + "p.copyright" + }; + + Arrays.stream(selectorsToRemove) + .forEach(selector -> articleBody.select(selector).remove()); + + String fullText = processHtmlAndExtractText( + articleBody, "img", "script", "style"); + + return Arrays.stream(fullText.split("PARAGRAPH_BREAK")) + .map(String::trim) + .filter(p -> !p.isEmpty()) + .collect(Collectors.toList()); + + } catch (Exception e) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.SCRAPER_PARSING_ERROR, e); } + } + + private String processHtmlAndExtractText(Element element, String... tagsToRemove) { + String html = element.html(); + html = html.replaceAll("", "PARAGRAPH_BREAK"); + Element parsedElement = Jsoup.parse(html).body(); - return paragraphs; + Element processedElement = HtmlScraperUtils.removeTags(parsedElement, tagsToRemove); + + return processedElement.text(); } /** - * 동아일보 기사 URL에서 본문 내용을 텍스트로 스크래핑 + * 동아일보 기사 URL에서 본문 내용을 PARAGRAPH_BREAK로 구분된 문자열로 스크래핑 * * @param url 기사 URL - * @return 스크래핑된 본문 + * @return PARAGRAPH_BREAK로 구분된 본문 문자열 + * @throws ArticleCollectorException 스크래핑 중 오류 발생 시 */ @Override - public String scrapeContent(String url) { + public String scrapeContent(String url) throws ArticleCollectorException { List paragraphs = scrapeParagraphs(url); - return String.join("\n\n", paragraphs); + return String.join("PARAGRAPH_BREAK", paragraphs); } /** - * 동아일보 기사 URL에서 이미지 URL을 스크래핑 + * 동아일보 RSS에서 이미지를 가져오므로 빈 문자열 반환 (구현 필요없음) * * @param url 기사 URL - * @return 스크래핑된 이미지 URL - * @throws ArticleCollectorException 스크래핑 중 오류 발생 시 FEED_PARSING_ERROR 예외 발생 + * @return 빈 문자열 + * @throws ArticleCollectorException 사용되지 않음 */ @Override - public String scrapeImageUrl(String url) { - try { - Document document = Jsoup.connect(url).get(); - return extractImageUrlFromDocument(document); - } catch (IOException e) { - throw new ArticleCollectorException(ArticleCollectorErrorCode.FEED_PARSING_ERROR, e); - } - } - - /** - * Document에서 이미지 URL 추출 - * - * @param document 파싱된 JSoup Document - * @return 추출된 이미지 URL - */ - private String extractImageUrlFromDocument(Document document) { - return HtmlScraperUtils.extractImageUrl(document, "section.news_view figure img"); + public String scrapeImageUrl(String url) throws ArticleCollectorException { + return ""; } /** diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/implement/KhanContentScraper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/implement/KhanContentScraper.java index 48c1ac9..d2edba1 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/implement/KhanContentScraper.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/implement/KhanContentScraper.java @@ -3,15 +3,18 @@ import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.ContentScraper; -import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.util.HtmlScraperUtils; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.util.HtmlScraperUtils; +import lombok.extern.slf4j.Slf4j; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.springframework.stereotype.Component; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; +import java.util.Objects; /** * 경향신문 기사 본문 스크래퍼 구현체 @@ -19,6 +22,7 @@ * @author 양병학 * @since 2025-05-13 최초 작성 */ +@Slf4j @Component public class KhanContentScraper implements ContentScraper { @@ -27,16 +31,18 @@ public class KhanContentScraper implements ContentScraper { * * @param url 기사 URL * @return 문단 단위로 나눈 본문 리스트 + * @throws ArticleCollectorException 스크래핑 중 오류 발생 시 */ @Override - public List scrapeParagraphs(String url) { - try { - Document document = connectToUrl(url); - List paragraphs = extractKhanContent(document); - return paragraphs; - } catch (Exception e) { - throw new ArticleCollectorException(ArticleCollectorErrorCode.ITEM_MAPPING_ERROR, e); + public List scrapeParagraphs(String url) throws ArticleCollectorException { + Document document = connectToUrl(url); + List content = extractKhanContent(document); + + if (content == null || content.isEmpty() || content.stream().allMatch(String::isEmpty)) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.EMPTY_ARTICLE_CONTENT); } + + return content; } /** @@ -44,27 +50,33 @@ public List scrapeParagraphs(String url) { * * @param document JSoup Document * @return 문단 리스트 + * @throws ArticleCollectorException 본문 파싱 중 오류 발생 시 */ - private List extractKhanContent(Document document) { - Element artBody = HtmlScraperUtils.findElement(document, "article.art_body"); - - if (artBody == null) { - artBody = HtmlScraperUtils.findElement(document, "div.art_body"); - } - - if (artBody == null) { - artBody = HtmlScraperUtils.findElement(document, "div.article_view"); - } - - if (artBody == null) { - artBody = HtmlScraperUtils.findElement(document, "div.article-body"); - } - - if (artBody == null) { - return new ArrayList<>(); + private List extractKhanContent(Document document) throws ArticleCollectorException { + try { + List selectors = List.of( + "article.art_body", + "div.art_body", + "div.article_view", + "div.article-body" + ); + // 가장 먼저 매칭되는 Element 하나만 찾기 + Element artBody = selectors.stream() + .map(sel -> HtmlScraperUtils.findElement(document, sel)) + .filter(Objects::nonNull) + .findFirst() + .orElse(null); + // 못 찾았으면 빈 리스트 반환 + if (artBody == null) { + return new ArrayList<>(); + } + // 찾았으면 실제 파싱 로직 호출 + return extractKhanContentFromElement(artBody); + } catch (Exception e) { + throw new ArticleCollectorException( + ArticleCollectorErrorCode.SCRAPER_PARSING_ERROR, e + ); } - - return extractKhanContentFromElement(artBody); } /** @@ -72,56 +84,71 @@ private List extractKhanContent(Document document) { * * @param artBody 기사 본문 요소 * @return 문단 리스트 + * @throws ArticleCollectorException 본문 요소 처리 중 오류 발생 시 */ - private List extractKhanContentFromElement(Element artBody) { - Element processedBody = HtmlScraperUtils.removeTags(artBody, "h3", "div.art_photo", "img"); - - processedBody.select("*").forEach(el -> { - el.removeAttr("align"); - el.removeAttr("vspace"); - el.removeAttr("hspace"); - el.removeAttr("style"); - el.removeAttr("width"); - el.removeAttr("height"); - }); - - Elements paragraphs = processedBody.select("p"); - - if (paragraphs.isEmpty()) { - paragraphs = processedBody.select("div.article_paragraph"); - } + private List extractKhanContentFromElement(Element artBody) throws ArticleCollectorException { + try { + Element processedBody = HtmlScraperUtils.removeTags(artBody, "h3", "div.art_photo", "img"); - if (paragraphs.isEmpty()) { - paragraphs = processedBody.select("span.article_text"); - } + processedBody.select("*").forEach(el -> { + el.removeAttr("align"); + el.removeAttr("vspace"); + el.removeAttr("hspace"); + el.removeAttr("style"); + el.removeAttr("width"); + el.removeAttr("height"); + }); + + Elements paragraphs = processedBody.select("p"); - if (paragraphs.isEmpty()) { - List fallback = new ArrayList<>(); - String fullText = processedBody.text().trim(); - if (!fullText.isEmpty()) { - fallback.add(fullText); + if (paragraphs.isEmpty()) { + paragraphs = processedBody.select("div.article_paragraph"); + } + + if (paragraphs.isEmpty()) { + paragraphs = processedBody.select("span.article_text"); } - return fallback; - } - List result = paragraphs.stream() - .map(Element::text) - .filter(text -> !text.trim().isEmpty()) - .toList(); + if (paragraphs.isEmpty()) { + List fallback = new ArrayList<>(); + String fullText = processedBody.text().trim(); + if (!fullText.isEmpty()) { + fallback.add(fullText); + } + return fallback; + } + + List result = new ArrayList<>(); + for (Element p : paragraphs) { + String text = p.text().trim(); + if (!text.isEmpty()) { + result.add(text); + } + } - return result; + return result; + } catch (Exception e) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.SCRAPER_PARSING_ERROR, e); + } } /** - * 경향신문 기사 URL에서 본문 내용을 텍스트로 스크래핑 + * 경향신문 기사 URL에서 본문 내용을 PARAGRAPH_BREAK로 구분된 문자열로 스크래핑 * * @param url 기사 URL - * @return 스크래핑된 본문 + * @return PARAGRAPH_BREAK로 구분된 본문 문자열 + * @throws ArticleCollectorException 스크래핑 중 오류 발생 시 */ @Override - public String scrapeContent(String url) { - List paragraphs = scrapeParagraphs(url); - return String.join("\n\n", paragraphs); + public String scrapeContent(String url) throws ArticleCollectorException { + Document document = connectToUrl(url); + List paragraphs = extractKhanContent(document); + + if (paragraphs == null || paragraphs.isEmpty() || paragraphs.stream().allMatch(String::isEmpty)) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.EMPTY_ARTICLE_CONTENT); + } + + return String.join("PARAGRAPH_BREAK", paragraphs); } /** @@ -129,63 +156,100 @@ public String scrapeContent(String url) { * * @param url 기사 URL * @return 스크래핑된 이미지 URL + * @throws ArticleCollectorException 스크래핑 중 오류 발생 시 */ @Override - public String scrapeImageUrl(String url) { + public String scrapeImageUrl(String url) throws ArticleCollectorException { + Document document = connectToUrl(url); + String imageUrl = extractImageUrlFromDocument(document); + + if (imageUrl == null || imageUrl.isEmpty()) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.EMPTY_ARTICLE_IMAGE); + } + + return imageUrl; + } + + /** + * 문서에서 이미지 URL을 추출합니다. + * 여러 선택자를 순차적으로 시도하여 첫 번째로 발견된 유효한 이미지 URL을 반환합니다. + * + * @param document 이미지를 추출할 JSoup Document + * @return 추출된 이미지 URL 또는 빈 문자열 + * @throws ArticleCollectorException 파싱 중 오류 발생 시 + * @since 2025-05-18 + * @author 양병학 + */ + private String extractImageUrlFromDocument(Document document) throws ArticleCollectorException { try { - Document document = connectToUrl(url); - String imageUrl = extractImageUrlFromDocument(document); - return imageUrl; + String metaImageUrl = extractMetaImageUrl(document); + if (!metaImageUrl.isEmpty()) { + return metaImageUrl; + } + + return extractImageUrlFromSelectors(document); } catch (Exception e) { - throw new ArticleCollectorException(ArticleCollectorErrorCode.ITEM_MAPPING_ERROR, e); + throw new ArticleCollectorException(ArticleCollectorErrorCode.SCRAPER_PARSING_ERROR, e); } } /** - * Document에서 이미지 URL 추출 + * 메타 태그에서 이미지 URL을 추출합니다. * - * @param document 파싱된 JSoup Document - * @return 추출된 이미지 URL + * @param document 이미지를 추출할 JSoup Document + * @return 추출된 이미지 URL 또는 빈 문자열 */ - private String extractImageUrlFromDocument(Document document) { + private String extractMetaImageUrl(Document document) { Element metaImg = document.selectFirst("meta[property=og:image]"); if (metaImg != null && !metaImg.attr("content").isEmpty()) { return metaImg.attr("content"); } + return ""; + } - Element mainImg = document.selectFirst("picture img"); - if (mainImg != null && !mainImg.attr("src").isEmpty()) { - return mainImg.attr("abs:src"); - } - - Element source = document.selectFirst("picture source"); - if (source != null && !source.attr("srcset").isEmpty()) { - String srcset = source.attr("srcset"); - String[] sources = srcset.split(","); - if (sources.length > 0) { - String firstSource = sources[0].trim().split("\\s+")[0]; - return source.absUrl("srcset").isEmpty() ? firstSource : source.absUrl("srcset"); - } - } - - Element contentImg = document.selectFirst("article.art_body img"); - if (contentImg != null && !contentImg.attr("src").isEmpty()) { - return contentImg.attr("abs:src"); - } + /** + * 다양한 이미지 선택자를 시도하여 이미지 URL을 추출합니다. + * + * @param document 이미지를 추출할 JSoup Document + * @return 추출된 이미지 URL 또는 빈 문자열 + */ + private String extractImageUrlFromSelectors(Document document) { + List simpleSelectors = List.of( + "picture img", + "article.art_body img", + "div.art_photo img", + "figure img", + "img" + ); - Element imgContainer = document.selectFirst("div.art_photo img"); - if (imgContainer != null && !imgContainer.attr("src").isEmpty()) { - return imgContainer.attr("abs:src"); - } + return simpleSelectors.stream() + .map(document::selectFirst) + .filter(Objects::nonNull) + .filter(img -> !img.attr("src").isEmpty()) + .map(img -> img.attr("abs:src")) + .findFirst() + .orElseGet(() -> { + Element source = document.selectFirst("picture source"); + if (source != null && !source.attr("srcset").isEmpty()) { + return extractSourceSetImageUrl(source); + } + return ""; + }); + } - Element figureImg = document.selectFirst("figure img"); - if (figureImg != null && !figureImg.attr("src").isEmpty()) { - return figureImg.attr("abs:src"); - } + /** + * source 태그의 srcset 속성에서 이미지 URL을 추출합니다. + * + * @param source srcset 속성을 가진 source 요소 + * @return 추출된 이미지 URL 또는 빈 문자열 + */ + private String extractSourceSetImageUrl(Element source) { + String srcset = source.attr("srcset"); + String[] sources = srcset.split(","); - Element anyImg = document.selectFirst("img"); - if (anyImg != null && !anyImg.attr("src").isEmpty()) { - return anyImg.attr("abs:src"); + if (sources.length > 0) { + String firstSource = sources[0].trim().split("\\s+")[0]; + return source.absUrl("srcset").isEmpty() ? firstSource : source.absUrl("srcset"); } return ""; diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/HtmlParser.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/HtmlParser.java new file mode 100644 index 0000000..1399ad0 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/HtmlParser.java @@ -0,0 +1,68 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.util; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +/** + * HTML 파싱 및 문단 추출을 처리하는 유틸리티 클래스 + * + * @author 양병학 + * @since 2025-05-18 + */ +public class HtmlParser { + + /** + * HTML 문자열에서 모든 태그를 제거하고 문단을 추출 + * + * @param html HTML 문자열 + * @return 정제된 문단 리스트 + */ + public List extractCleanParagraphs(String html) { + if (html == null || html.isEmpty()) { + return new ArrayList<>(); + } + + try { + String withBreaks = html.replaceAll("", "PARAGRAPH_BREAK"); + String noTags = withBreaks.replaceAll("<[^>]*>", ""); + String decoded = decodeHtmlEntities(noTags); + + decoded = decoded.replaceAll("\\s+", " ").trim(); + String[] paragraphs = decoded.split("PARAGRAPH_BREAK"); + + return Arrays.stream(paragraphs) + .map(String::trim) + .filter(p -> !p.isEmpty()) + .collect(Collectors.toList()); + } catch (Exception e) { + List fallback = new ArrayList<>(); + fallback.add(removeAllHtmlTags(html)); + return fallback; + } + } + + private String removeAllHtmlTags(String html) { + if (html == null || html.isEmpty()) { + return ""; + } + + String noTags = html.replaceAll("<[^>]*>", ""); + return decodeHtmlEntities(noTags).replaceAll("\\s+", " ").trim(); + } + + private String decodeHtmlEntities(String text) { + if (text == null || text.isEmpty()) { + return ""; + } + + return text.replace(" ", " ") + .replace(" ", " ") + .replace("<", "<") + .replace(">", ">") + .replace("&", "&") + .replace(""", "\"") + .replace("'", "'"); + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/util/HtmlScraperUtils.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/HtmlScraperUtils.java similarity index 99% rename from src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/util/HtmlScraperUtils.java rename to src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/HtmlScraperUtils.java index 009396d..4f10b0e 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/util/HtmlScraperUtils.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/HtmlScraperUtils.java @@ -1,4 +1,4 @@ -package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.util; +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.util; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/ParagraphUtil.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/ParagraphUtil.java new file mode 100644 index 0000000..eb80cd2 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/ParagraphUtil.java @@ -0,0 +1,34 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.util; + +import java.util.List; + +/** + * 문단 처리 유틸리티 클래스 + * + * @author 양병학 + * @since 2025-05-18 + */ +public class ParagraphUtil { + + /** + * 문단 리스트를 구분자로 연결된 문자열로 직렬화 + * + * @param paragraphs 문단 리스트 + * @return 직렬화된 문자열 + */ + public String serializeParagraphs(List paragraphs) { + if (paragraphs == null || paragraphs.isEmpty()) { + return ""; + } + + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < paragraphs.size(); i++) { + if (i > 0) { + sb.append("PARAGRAPH_BREAK"); + } + sb.append(paragraphs.get(i)); + } + + return sb.toString(); + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/writer/ArticleWriter.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/writer/ArticleWriter.java index e66ccc3..ad5c8d4 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/writer/ArticleWriter.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/writer/ArticleWriter.java @@ -1,7 +1,11 @@ package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.writer; import java.util.List; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Map; import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; import org.springframework.batch.item.Chunk; import org.springframework.batch.item.ItemWriter; @@ -14,15 +18,20 @@ import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; +import static java.util.Map.entry; + /** * 기사 데이터를 DB에 저장하는 Spring Batch ItemWriter 구현체. * 중복된 링크는 저장하지 않으며, 새롭게 저장된 기사 수를 로그로 출력한다. + * 매퍼에서 전달된 문단 구분자(PARAGRAPH_BREAK)를 기준으로 문단을 분리하고 직렬화한다. * * - 입력: 기사 리스트(List) - * - 처리: 중복 여부 확인 후 저장 + * - 처리: 문단 분리, 직렬화, 중복 여부 확인 후 저장 * - 출력: 로그 출력 (중복 제외) * * @since 2025-05-10 + * @modified 2025-05-15 직렬화 확인 로직 추가 + * @modified 2025-05-16 PARAGRAPH_BREAK 기반 문단 처리 추가 */ @Slf4j @Component @@ -30,6 +39,7 @@ public class ArticleWriter implements ItemWriter> { private final NewsInfoJpaRepository newsInfoJpaRepository; + private static final String PARAGRAPH_BREAK = "PARAGRAPH_BREAK"; /** * 기사 리스트를 저장하며, 중복된 기사는 건너뛴다. @@ -44,13 +54,138 @@ public void write(Chunk> chunk) { AtomicInteger savedCount = new AtomicInteger(); chunk.getItems().stream() .flatMap(List::stream) + .peek(this::processAndSerializeDescription) .filter(item -> !newsInfoJpaRepository.existsByLink(item.getLink())) .forEach(item -> {saveItem(item, savedCount);}); + log.info("새로 저장된 뉴스 개수: {}", savedCount.get()); } - /*** - * DB에 뉴스를 저장하고, 저장된 개수를 증가시킵니다. + /** + * 설명(description) 필드를 처리하고 JSON 형식으로 직렬화 + * 매퍼에서 전달된 PARAGRAPH_BREAK를 기준으로 문단을 분리하기 + * + * @param item 처리할 ArticleEntity 객체 + */ + private void processAndSerializeDescription(ArticleEntity item) { + String description = item.getDescription(); + if (description == null || description.isEmpty()) { + item.setDescription("[]"); + return; + } + + if (isAlreadyJsonFormat(description)) { + return; + } + + List paragraphs = splitIntoParagraphs(description); + String jsonDescription = serializeToJson(paragraphs); + item.setDescription(jsonDescription); + + } + + /** + * 문자열이 JSON 형식인지 확인 + * + * @param text 확인할 문자열 + * @return JSON 형식이면 true, 아니면 false + */ + private boolean isAlreadyJsonFormat(String text) { + return text.trim().startsWith("[") && text.trim().endsWith("]"); + } + + /** + * 텍스트를 PARAGRAPH_BREAK를 기준으로 문단으로 분리 + * + * @param text 분리할 텍스트 + * @return 분리된 문단 리스트 + */ + private List splitIntoParagraphs(String text) { + if (!text.contains(PARAGRAPH_BREAK)) { + log.warn("PARAGRAPH_BREAK 구분자가 없는 텍스트 감지: {}", + text.substring(0, Math.min(text.length(), 50)) + "..."); + return Arrays.asList(text); + } + + String[] paragraphArray = text.split(PARAGRAPH_BREAK); + List paragraphs = new ArrayList<>(); + + for (String paragraph : paragraphArray) { + String trimmed = paragraph.trim(); + if (!trimmed.isEmpty()) { + paragraphs.add(trimmed); + } + } + + if (paragraphs.isEmpty()) { + paragraphs.add(text); + } + + return paragraphs; + } + + /** + * 문단 리스트를 JSON으로 직렬화한다. + * + * @param paragraphs 직렬화할 문단 리스트 + * @return JSON 형식의 문자열 + */ + private String serializeToJson(List paragraphs) { + if (paragraphs == null || paragraphs.isEmpty()) { + return "[]"; + } + + StringBuilder sb = new StringBuilder("["); + for (int i = 0; i < paragraphs.size(); i++) { + String paragraph = paragraphs.get(i); + String escaped = escapeJsonString(paragraph); + sb.append("\"").append(escaped).append("\""); + if (i < paragraphs.size() - 1) { + sb.append(","); + } + } + sb.append("]"); + + return sb.toString(); + } + + private static final Map JSON_ESCAPES = Map.ofEntries( + entry('\"', "\\\""), + entry('\\', "\\\\"), + entry('/', "\\/"), + entry('\b', "\\b"), + entry('\f', "\\f"), + entry('\n', "\\n"), + entry('\r', "\\r"), + entry('\t', "\\t") + ); + + /** + * JSON 문자열 이스케이프 처리 + * + * @param input 이스케이프할 문자열 + * @return 이스케이프된 문자열 + */ + protected String escapeJsonString(String input) { + if (input == null) { + return ""; + } + return input.chars() + .mapToObj(cp -> { + char c = (char) cp; + if (JSON_ESCAPES.containsKey(c)) { + return JSON_ESCAPES.get(c); + } + if (cp < 0x20) { + return String.format("\\u%04x", cp); + } + return String.valueOf(c); + }) + .collect(Collectors.joining()); + } + + /** + * DB에 뉴스를 저장하고, 저장된 개수를 증가시킨다. * * @param item 저장할 뉴스 * @param savedCount 저장된 갯수 diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/entity/ArticleEntity.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/entity/ArticleEntity.java index 19b3e16..b3d1c42 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/entity/ArticleEntity.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/entity/ArticleEntity.java @@ -4,15 +4,15 @@ import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.converter.FloatArrayToJsonConverter; -import jakarta.persistence.Column; -import jakarta.persistence.Convert; -import jakarta.persistence.Entity; -import jakarta.persistence.GeneratedValue; -import jakarta.persistence.GenerationType; -import jakarta.persistence.Id; -import jakarta.persistence.PrePersist; -import jakarta.persistence.Table; -import jakarta.persistence.UniqueConstraint; +import jakarta.persistence.*; +import jakarta.validation.constraints.NotBlank; +import jakarta.validation.constraints.NotNull; +import jakarta.validation.constraints.PastOrPresent; +import jakarta.validation.constraints.Pattern; +import jakarta.validation.constraints.Size; + +import org.hibernate.validator.constraints.URL; + import lombok.AllArgsConstructor; import lombok.Builder; import lombok.EqualsAndHashCode; @@ -45,38 +45,52 @@ public class ArticleEntity { @GeneratedValue(strategy = GenerationType.IDENTITY) private Long id; - @Setter - @Column(nullable = false) - private String title; - - @Column(nullable = false, unique = true) - private String link; - - @Setter - @Column(name = "pub_date") - private LocalDateTime pubDate; - - @Column - private String category; - - @Column - private String guid; - - @Setter - @Column(columnDefinition = "TEXT") - private String description; - - @Setter - @Column(name = "summary", columnDefinition = "TEXT") - private String summary; - - @Setter - @Column(name = "image_url") - private String imageUrl; + @Setter + @Column(nullable = false) + @NotBlank(message = "제목은 필수 값입니다") + @Size(max = 500, message = "제목은 최대 500자까지 허용됩니다") + private String title; + + @Column(nullable = false, unique = true) + @NotBlank(message = "링크는 필수 값입니다") + @URL(message = "유효한 URL 형식이어야 합니다") + @Size(max = 255, message = "링크는 최대 255자까지 허용됩니다") + private String link; + + @Setter + @Column(name = "pub_date") + @NotNull(message = "발행일은 필수 값입니다") + @PastOrPresent(message = "발행일은 현재 또는 과거 날짜여야 합니다") + private LocalDateTime pubDate; + + @Column + @NotBlank(message = "카테고리는 필수 값입니다") + @Size(max =10, message = "카테고리는 최대 10자까지 허용됩니다") + private String category; + + @Column + @NotBlank(message = "GUID는 필수 값입니다") + @Size(max = 255, message = "GUID는 최대 255자까지 허용됩니다") + @Pattern(regexp = "^[A-Z]{2}\\d+$", message = "GUID는 2개의 대문자와 숫자로 구성되어야 합니다") // 예: KM12345 + private String guid; + + @Setter + @Column(columnDefinition = "TEXT") + private String description; + + @Setter + @Column(name = "image_url") + @Size(max = 1000, message = "이미지 URL은 최대 1000자까지 허용됩니다") + private String imageUrl; @Column(name = "created_at") private LocalDateTime createdAt; + @Setter + @Column(name = "summary", columnDefinition = "TEXT") + @Size(max = 1000, message = "요약은 최대 1000자까지 허용됩니다") + private String summary; + @Convert(converter = FloatArrayToJsonConverter.class) @Column(name = "summary_vector", columnDefinition = "JSON") private float[] summaryVector; @@ -94,8 +108,4 @@ protected void onCreate() { public String getDescription() { return description != null ? description : ""; } - - public String getSummary() { - return summary != null ? summary : ""; - } } \ No newline at end of file From d71717dbacd943eb4bb2935bf393db2f738b1aee Mon Sep 17 00:00:00 2001 From: "Yejeong, Ham" Date: Mon, 19 May 2025 11:00:22 +0900 Subject: [PATCH 33/36] =?UTF-8?q?=F0=9F=90=9B=20Fix:=20HtmlParser=20?= =?UTF-8?q?=EC=BB=B4=ED=8F=AC=EB=84=8C=ED=8A=B8=20=EB=93=B1=EB=A1=9D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../infrastructure/collector/support/util/HtmlParser.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/HtmlParser.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/HtmlParser.java index 1399ad0..a5a6511 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/HtmlParser.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/HtmlParser.java @@ -11,6 +11,7 @@ * @author 양병학 * @since 2025-05-18 */ +@Component public class HtmlParser { /** @@ -65,4 +66,4 @@ private String decodeHtmlEntities(String text) { .replace(""", "\"") .replace("'", "'"); } -} \ No newline at end of file +} From bd97c68ffaac6b0174332db8799638af1f551a9b Mon Sep 17 00:00:00 2001 From: "Yejeong, Ham" Date: Mon, 19 May 2025 11:02:43 +0900 Subject: [PATCH 34/36] =?UTF-8?q?=F0=9F=90=9B=20Fix:=20=EC=BB=B4=ED=8F=AC?= =?UTF-8?q?=EB=84=8C=ED=8A=B8=20=EC=96=B4=EB=85=B8=ED=85=8C=EC=9D=B4?= =?UTF-8?q?=EC=85=98=20=ED=81=B4=EB=9E=98=EC=8A=A4=20import=20=EC=B6=94?= =?UTF-8?q?=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../infrastructure/collector/support/util/HtmlParser.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/HtmlParser.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/HtmlParser.java index a5a6511..91ba8b5 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/HtmlParser.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/HtmlParser.java @@ -1,5 +1,7 @@ package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.util; +import org.springframework.stereotype.Component; + import java.util.ArrayList; import java.util.Arrays; import java.util.List; From eabff413085d6e5c682708834514af2f94b24967 Mon Sep 17 00:00:00 2001 From: Atriel <118334518+Atriel1999@users.noreply.github.com> Date: Mon, 19 May 2025 11:22:19 +0900 Subject: [PATCH 35/36] =?UTF-8?q?=F0=9F=90=9B=20Fix:=20=EC=9C=A0=ED=8B=B8?= =?UTF-8?q?=EB=A6=AC=ED=8B=B0=20=EB=A9=94=EC=86=8C=EB=93=9C=EC=97=90=20@Co?= =?UTF-8?q?mponents=20=EC=96=B4=EB=85=B8=ED=85=8C=EC=9D=B4=EC=85=98=20?= =?UTF-8?q?=EC=B6=94=EA=B0=80=20(#79)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../collector/support/util/HtmlScraperUtils.java | 2 ++ .../infrastructure/collector/support/util/ParagraphUtil.java | 3 +++ 2 files changed, 5 insertions(+) diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/HtmlScraperUtils.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/HtmlScraperUtils.java index 4f10b0e..95d077a 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/HtmlScraperUtils.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/HtmlScraperUtils.java @@ -3,6 +3,7 @@ import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; +import org.springframework.stereotype.Component; import java.util.ArrayList; import java.util.List; @@ -14,6 +15,7 @@ * @author 양병학 * @since 2025-05-13 최초 작성 */ +@Component public class HtmlScraperUtils { /** diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/ParagraphUtil.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/ParagraphUtil.java index eb80cd2..1233af4 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/ParagraphUtil.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/ParagraphUtil.java @@ -1,5 +1,7 @@ package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.util; +import org.springframework.stereotype.Component; + import java.util.List; /** @@ -8,6 +10,7 @@ * @author 양병학 * @since 2025-05-18 */ +@Component public class ParagraphUtil { /** From 01ea56245d374b533aa009869216c34729549feb Mon Sep 17 00:00:00 2001 From: JUNG ANSIK Date: Tue, 20 May 2025 10:59:32 +0900 Subject: [PATCH 36/36] =?UTF-8?q?=E2=9C=A8=C2=A0=20Feature:=20#78=20?= =?UTF-8?q?=EB=89=B4=EC=8A=A4=20=EC=9A=94=EC=95=BD=20=EB=B0=8F=20=EB=B2=A1?= =?UTF-8?q?=ED=84=B0=20=ED=95=84=EB=93=9C=20=EC=B6=94=EA=B0=80=20(#80)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ✨  Feature/#58-뉴스-데이터-Elasticsearch-저장 * ♻️ Refactor: Error핸들 로직 추가 * ♻️ Refactor: sample코드 제거 및 주석 수정 * 📝 Docs: 주석을 올바르게 수정하였습니다. * ♻️ Refactor: yml 수정 --- .gitignore | 1 + .../batch/index/domain/model/NewsInfo.java | 6 -- .../adapter/ElasticsearchNewsInfoAdapter.java | 40 ++++++++----- .../adapter/document/NewsInfoDocument.java | 8 ++- .../mapper/NewsInfoDocumentMapper.java | 7 ++- .../jpa/adapter/NewsInfoProviderAdapter.java | 1 - .../index/NewsIndexServiceTestImpl.java | 59 ------------------- .../NewsInfoProviderPortSample.java | 31 ---------- 8 files changed, 39 insertions(+), 114 deletions(-) delete mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/sample/index/NewsIndexServiceTestImpl.java delete mode 100644 src/main/java/com/likelion/backendplus4/talkpick/batch/sample/index/providerPort/NewsInfoProviderPortSample.java diff --git a/.gitignore b/.gitignore index a2ba527..64bad6c 100644 --- a/.gitignore +++ b/.gitignore @@ -97,6 +97,7 @@ $RECYCLE.BIN/ *.ear *.zip *.tar.gz +*.gz *.rar *.hprof hs_err_pid* diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/domain/model/NewsInfo.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/domain/model/NewsInfo.java index 386c90b..0b0894c 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/domain/model/NewsInfo.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/domain/model/NewsInfo.java @@ -6,12 +6,6 @@ import lombok.Getter; import lombok.RequiredArgsConstructor; -/** - * 뉴스 정보를 표현하는 도메인 모델 - * - * @since 2025-05-15 - * @modified 2025-05-18 - */ @RequiredArgsConstructor @Getter @Builder diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/infrastructure/adapter/ElasticsearchNewsInfoAdapter.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/infrastructure/adapter/ElasticsearchNewsInfoAdapter.java index 60db859..c722cf2 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/infrastructure/adapter/ElasticsearchNewsInfoAdapter.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/infrastructure/adapter/ElasticsearchNewsInfoAdapter.java @@ -27,6 +27,7 @@ * Spring Data Elasticsearch를 이용해 뉴스 정보를 Bulk 색인하고 저장된 개수를 반환하는 어댑터 * * @since 2025-05-15 + * @modified 2025-05-19 */ @Component public class ElasticsearchNewsInfoAdapter implements NewsInfoIndexRepositoryPort { @@ -98,27 +99,36 @@ private void ensureIndexExists(IndexOperations ops) { * @return 매핑 프로퍼티 맵 * @author 정안식 * @since 2025-05-15 + * @modified 2025-05-19 + * 25-05-19 - summary 및 summary_vector 필드 추가 */ private Map mappingProperties() { - return Map.of( - NewsInfoDocument.FIELD_ID, Map.of("type", "keyword"), - NewsInfoDocument.FIELD_TITLE, Map.of( + return Map.ofEntries( + Map.entry(NewsInfoDocument.FIELD_ID, Map.of( + "type", "keyword")), + Map.entry(NewsInfoDocument.FIELD_TITLE, Map.of( "type", "text", "analyzer", NewsInfoDocument.ANALYZER_NORI, - "fields", Map.of( - NewsInfoDocument.FIELD_KEYWORD, Map.of("type", "keyword") - ) - ), - NewsInfoDocument.FIELD_CONTENT, Map.of( + "fields", Map.of(NewsInfoDocument.FIELD_KEYWORD, Map.of("type", "keyword")))), + Map.entry(NewsInfoDocument.FIELD_CONTENT, Map.of( "type", "text", "analyzer", NewsInfoDocument.ANALYZER_NORI, - "fields", Map.of( - NewsInfoDocument.FIELD_KEYWORD, Map.of("type", "keyword") - ) - ), - NewsInfoDocument.FIELD_PUBLISHED_AT, Map.of("type", "date"), - NewsInfoDocument.FIELD_IMAGE_URL, Map.of("type", "keyword"), - NewsInfoDocument.FIELD_CATEGORY, Map.of("type", "keyword") + "fields", Map.of(NewsInfoDocument.FIELD_KEYWORD, Map.of("type", "keyword")))), + Map.entry(NewsInfoDocument.FIELD_PUBLISHED_AT, Map.of( + "type", "date")), + Map.entry(NewsInfoDocument.FIELD_IMAGE_URL, Map.of( + "type", "keyword")), + Map.entry(NewsInfoDocument.FIELD_CATEGORY, Map.of( + "type", "keyword")), + Map.entry(NewsInfoDocument.FIELD_SUMMARY, Map.of( + "type", "text", + "analyzer", NewsInfoDocument.ANALYZER_NORI, + "fields", Map.of(NewsInfoDocument.FIELD_KEYWORD, Map.of("type", "keyword")))), + Map.entry(NewsInfoDocument.FIELD_SUMMARY_VECTOR, Map.of( + "type", "dense_vector", + "dims", 1536, + "index", true, + "similarity", "cosine")) ); } diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/infrastructure/adapter/document/NewsInfoDocument.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/infrastructure/adapter/document/NewsInfoDocument.java index 02319a5..9ba5ec1 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/infrastructure/adapter/document/NewsInfoDocument.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/infrastructure/adapter/document/NewsInfoDocument.java @@ -8,7 +8,9 @@ /** * Elasticsearch에 저장될 뉴스 정보 문서 모델 클래스 * - * @since 2025-05-15 + * @since 2025-05-19 + * @modified 2025-05-19 + * 25-05-19 - summary, summaryVector 필드 추가 */ @Getter @AllArgsConstructor @@ -19,6 +21,8 @@ public class NewsInfoDocument { private final LocalDateTime publishedAt; private final String imageUrl; private final String category; + private final String summary; + private final float[] summaryVector; public static final String FIELD_ID = "newsId"; public static final String FIELD_TITLE = "title"; @@ -26,6 +30,8 @@ public class NewsInfoDocument { public static final String FIELD_PUBLISHED_AT = "publishedAt"; public static final String FIELD_IMAGE_URL = "imageUrl"; public static final String FIELD_CATEGORY = "category"; + public static final String FIELD_SUMMARY = "summary"; + public static final String FIELD_SUMMARY_VECTOR = "summaryVector"; public static final String ANALYZER_NORI = "nori"; public static final String FIELD_KEYWORD = "keyword"; diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/infrastructure/adapter/mapper/NewsInfoDocumentMapper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/infrastructure/adapter/mapper/NewsInfoDocumentMapper.java index 9ae013a..1bcbb1b 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/infrastructure/adapter/mapper/NewsInfoDocumentMapper.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/infrastructure/adapter/mapper/NewsInfoDocumentMapper.java @@ -9,6 +9,7 @@ * 도메인 모델 NewsInfo를 Elasticsearch 문서 모델로 변환하는 매퍼 * * @since 2025-05-15 + * @modified 2025-05-19 */ @Component public class NewsInfoDocumentMapper { @@ -19,6 +20,8 @@ public class NewsInfoDocumentMapper { * @return 변환된 문서 객체 * @author 정안식 * @since 2025-05-15 + * @modified 2025-05-19 + * 25-05-19 - summary, summaryVector 필드 추가 */ public NewsInfoDocument toDocument(NewsInfo news) { return new NewsInfoDocument( @@ -27,7 +30,9 @@ public NewsInfoDocument toDocument(NewsInfo news) { news.getContent(), news.getPublishedAt(), news.getImageUrl(), - news.getCategory() + news.getCategory(), + news.getSummary(), + news.getSummaryVector() ); } } diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/adapter/NewsInfoProviderAdapter.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/adapter/NewsInfoProviderAdapter.java index d4c1113..b3b3a03 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/adapter/NewsInfoProviderAdapter.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/adapter/NewsInfoProviderAdapter.java @@ -24,7 +24,6 @@ */ @Component @RequiredArgsConstructor -@Primary public class NewsInfoProviderAdapter implements NewsInfoProviderPort { private static final int MAX_NEWS_COUNT = 100; private final NewsInfoJpaRepository newsInfoJpaRepository; diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/index/NewsIndexServiceTestImpl.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/index/NewsIndexServiceTestImpl.java deleted file mode 100644 index 08ecac2..0000000 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/index/NewsIndexServiceTestImpl.java +++ /dev/null @@ -1,59 +0,0 @@ -package com.likelion.backendplus4.talkpick.batch.sample.index; - -import java.time.LocalDateTime; -import java.util.List; - -import org.springframework.stereotype.Component; - -import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.EntryExitLog; -import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.LogMethodValues; -import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.TimeTracker; -import com.likelion.backendplus4.talkpick.batch.index.application.port.out.NewsInfoProviderPort; -import com.likelion.backendplus4.talkpick.batch.index.domain.model.NewsInfo; - -import lombok.extern.slf4j.Slf4j; - -@Slf4j -@Component -public class NewsIndexServiceTestImpl implements NewsInfoProviderPort{ - - @EntryExitLog - @TimeTracker - @LogMethodValues - @Override - public List fetchAll() { - return List.of( - new NewsInfo( - "news-1", - "테스트 뉴스 1", - "첫 번째 테스트 뉴스의 내용입니다.", - LocalDateTime.of(2025, 5, 14, 10, 0), - "https://example.com/image1.jpg", - "테스트", - "요약1", - new float[] {1.0f, 2.5f, 3.3f} - ), - new NewsInfo( - "news-2", - "테스트 뉴스 2", - "두 번째 테스트 뉴스의 내용입니다.", - LocalDateTime.of(2025, 5, 13, 11, 30), - "https://example.com/image2.jpg", - "테스트", - "요약2", - new float[] {1.0f, 2.5f, 3.3f} - ), - new NewsInfo( - "news-3", - "테스트 뉴스 3", - "세 번째 테스트 뉴스의 내용입니다.", - LocalDateTime.of(2025, 5, 12, 14, 45), - "https://example.com/image3.jpg", - "테스트", - "요약3", - new float[] {1.0f, 2.5f, 3.3f} - ) - ); - } -} - diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/index/providerPort/NewsInfoProviderPortSample.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/index/providerPort/NewsInfoProviderPortSample.java deleted file mode 100644 index 23270cf..0000000 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/index/providerPort/NewsInfoProviderPortSample.java +++ /dev/null @@ -1,31 +0,0 @@ -package com.likelion.backendplus4.talkpick.batch.sample.index.providerPort; - -import static com.likelion.backendplus4.talkpick.batch.common.response.ApiResponse.*; - -import java.util.List; - -import org.springframework.http.ResponseEntity; -import org.springframework.web.bind.annotation.GetMapping; -import org.springframework.web.bind.annotation.RequestMapping; -import org.springframework.web.bind.annotation.RestController; - -import com.likelion.backendplus4.talkpick.batch.common.response.ApiResponse; -import com.likelion.backendplus4.talkpick.batch.index.application.port.out.NewsInfoProviderPort; -import com.likelion.backendplus4.talkpick.batch.index.domain.model.NewsInfo; - -import lombok.RequiredArgsConstructor; - -@RestController -@RequiredArgsConstructor -@RequestMapping("/sample/news/info") -public class NewsInfoProviderPortSample { - private final NewsInfoProviderPort newsInfoProviderPort; - - /** - * 실제 사용시에는 Response 객체로 변환 필요 - */ - @GetMapping - public ResponseEntity>> fetchAll() { - return success(newsInfoProviderPort.fetchAll()); - } -}