diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml deleted file mode 100644 index c2b4247..0000000 --- a/.github/ISSUE_TEMPLATE/config.yml +++ /dev/null @@ -1,6 +0,0 @@ -blank_issues_enabled: false - -contact_links: - - name: 아이디어 토론·Q&A는 여기로 - url: https://github.com/talkpick/talkpick-batch/discussions - about: 아이디어 토론·Q&A는 Discussions 탭을 이용해 주세요. diff --git a/.github/ISSUE_TEMPLATE/epic.yml b/.github/ISSUE_TEMPLATE/epic.yml deleted file mode 100644 index 96ab208..0000000 --- a/.github/ISSUE_TEMPLATE/epic.yml +++ /dev/null @@ -1,59 +0,0 @@ -name: "✨ Epic" -description: 큰 기능(에픽) 카드 – 하위 사용자 스토리를 모으는 부모 이슈 -title: "[Epic] " -labels: [epic] -assignees: [] - -body: - - type: textarea - id: goal - attributes: - label: 🎯 목표(Problem / Goal) - description: "이 에픽이 해결하려는 문제 또는 달성할 목표를 간략히 서술하세요." - placeholder: | - 예) “외부 인증 없이도 원‑클릭 로그인 기능을 제공하여, 신규 유저 유입 장벽을 낮춘다.” - validations: - required: true - - - type: textarea - id: background - attributes: - label: 📝 배경 / 맥락 - description: 설계 근거, 관련 문서·링크 등이 있으면 적어 주세요. - placeholder: | - - Figma UX 흐름: https://figma.com/… - - OAuth 2.0 Sequence 다이어그램 - validations: - required: false - - - type: textarea - id: scope - attributes: - label: 📋 범위 – 예상 하위 작업 체크리스트 - description: "*하위 Issue 를 추가하면 자동으로 갱신되므로 초안만 작성해도 됩니다.*" - placeholder: | - - [ ] UI 버튼 디자인 - - [ ] OAuth 동의 화면 호출 - - [ ] 신규 사용자 DB 저장 - render: markdown - validations: - required: false - - - type: textarea - id: dod - attributes: - label: ✅ 완료 기준(Definition of Done) - description: "에픽이 ‘Done’ 으로 이동되기 위한 최소 조건을 적으세요." - placeholder: | - - 모든 하위 Issue 완료 - - Dev/Staging 배포 확인 - - 릴리스 노트 작성 - validations: - required: true - - - type: markdown - attributes: - value: | - 🔗 **하위 이슈 연결 방법** - - 새 이슈를 만들 때 본문 맨 위에 `parent: #<이 Epic 번호>`를 쓰거나 - - Projects 보드에서 “Add child issue” 버튼을 누르세요. \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/feature.yml b/.github/ISSUE_TEMPLATE/feature.yml deleted file mode 100644 index 9f46135..0000000 --- a/.github/ISSUE_TEMPLATE/feature.yml +++ /dev/null @@ -1,31 +0,0 @@ -name: "✨ Feature" -description: "새로운 기능 추가" -labels: ["✨ Feature"] -projects: ["yakplus/1"] -body: - - type: textarea - attributes: - label: 📄 설명 - description: 새로운 기능에 대한 설명을 작성해 주세요. - placeholder: 자세히 적을수록 좋습니다! - validations: - required: true - - type: textarea - attributes: - label: ✅ 작업할 내용 - description: 할 일을 체크박스 형태로 작성해주세요. - placeholder: | - 🫧 - - [ ] <체크 1> - - [ ] <체크 2> - value: | - - [ ] <작업 1> - - [ ] <작업 2> - - [ ] <작업 3> - - [ ] 더입력 ... - validations: - required: true - - type: textarea - attributes: - label: 🙋🏻 참고 자료 - description: 참고 자료가 있다면 작성해 주세요. \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/tech-debt.yml b/.github/ISSUE_TEMPLATE/tech-debt.yml deleted file mode 100644 index a6add2d..0000000 --- a/.github/ISSUE_TEMPLATE/tech-debt.yml +++ /dev/null @@ -1,60 +0,0 @@ -name: "🛠️ Tech Debt / Refactor" -description: 리팩터링·성능 개선·보안 패치 등 기술 부채 상환 이슈 -title: "[Tech Debt] " -labels: [tech-debt] -assignees: [] - -body: - - type: textarea - id: area - attributes: - label: 📍 개선 대상(모듈·파일·클래스) - placeholder: | - 예) NewsController 의 300라인 이상 메소드 분리 - validations: - required: true - - - type: textarea - id: problem - attributes: - label: 🔎 현재 문제점 - description: "왜 부채가 되었는지, 어떤 위험·불편이 있는지 작성해 주세요." - placeholder: | - - 메소드 길이가 200라인을 넘어 가독성이 떨어집니다. - - 동일 로직이 Service 레이어에도 중복되어 유지보수가 어렵습니다. - validations: - required: true - - - type: textarea - id: proposal - attributes: - label: 💡 개선 방안(선택) - placeholder: | - - 메소드 분리 + 공통 로직 Utility 이동 - - 단위 테스트 추가 - validations: - required: false - - - type: dropdown - id: priority - attributes: - label: ⏰ 우선순위 - description: "해결 시기를 가늠할 수 있도록 심각도를 선택해 주세요." - options: - - high (즉시 상환: 다음 스프린트 포함) - - medium (2~3 스프린트 내) - - low (시간 날 때) - validations: - required: true - - - type: checkboxes - id: impact - attributes: - label: 기대 효과(복수 선택) - options: - - label: 읽기 쉬운 코드 - - label: 성능 개선 - - label: 버그 위험 감소 - - label: 보안 강화 - - label: 테스트 용이성 향상 - - label: 기타 (추가 건의) \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/user-story.yml b/.github/ISSUE_TEMPLATE/user-story.yml deleted file mode 100644 index 3e22402..0000000 --- a/.github/ISSUE_TEMPLATE/user-story.yml +++ /dev/null @@ -1,35 +0,0 @@ -name: "🗂️ 사용자 스토리" -description: 개발 작업 카드 (INVEST 형식) -title: "" -labels: [] -assignees: [] - -body: - - type: textarea - id: story - attributes: - label: 사용자 스토리 - description: "형식: **[역할]**로서 **[목표]**를 하고 싶다. 그 결과 **[이점]**을 얻는다." - placeholder: | - 예) 방문자로서 키워드로 뉴스를 검색하고 싶다. 그 결과 관련 기사를 빠르게 찾을 수 있다. - validations: - required: true - - - type: textarea - id: ac - attributes: - label: 수락 기준(Acceptance Criteria) - description: "동작 완료 조건 2~5개 (가능하면 Given‑When‑Then)." - placeholder: | - - [ ] 검색 결과는 0.5초 이내 반환된다 - - [ ] 결과가 없으면 '결과가 없습니다' 토스트가 표시된다 - validations: - required: true - - - type: checkboxes - id: dor - attributes: - label: Definition of Ready 체크 - options: - - label: 설명·목표가 명확하다 - - label: 수락 기준이 작성되었다 \ No newline at end of file diff --git a/.github/PULL_REQUEST_TEMPLATE/bugfix.md b/.github/PULL_REQUEST_TEMPLATE/bugfix.md deleted file mode 100644 index 66ef3a1..0000000 --- a/.github/PULL_REQUEST_TEMPLATE/bugfix.md +++ /dev/null @@ -1,46 +0,0 @@ - - -## 🐞 버그 설명 - - -## 🔍 원인(Root Cause) - - -## 🛠️ 수정(Implemented Changes) - - -## ✅ 테스트 방법 -1. [ ] `/login` 페이지 접근 -2. [ ] ‘GitHub로 로그인’ 클릭 → OAuth 동의 → 메인 화면 이동 -3. [ ] 토큰 만료 후 재시도 시 로그인 페이지로 정상 리다이렉트 - -> **CI 통합 테스트**도 포함되어야 합니다. - -## 📸 스크린샷 / 동영상(선택) - - ---- - -### 체크리스트 -- [ ] 관련 이슈 링크: `Fixes #` -- [ ] 단위 / 통합 테스트 통과 -- [ ] 코드리뷰 2인 승인 예정 -- [ ] Dev 환경 배포 확인 - - diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md deleted file mode 100644 index 17374f2..0000000 --- a/.github/pull_request_template.md +++ /dev/null @@ -1,31 +0,0 @@ -## 📌 PR 유형 (해당하는 항목에 모두 체크해주세요) -- [ ] Feat: 새로운 기능 추가 -- [ ] Fix: 버그 수정 -- [ ] Docs: 문서 수정 -- [ ] Style: 코드 포맷팅, 세미콜론 누락, 코드 변경이 없는 경우 -- [ ] Refactor: 코드 리팩토링 (기능 변경 없이 구조 개선) -- [ ] Test: 테스트 코드 추가 및 기존 테스트 리팩토링 -- [ ] Chore: 빌드 설정, 패키지 매니저 설정 등 기타 변경 -- [ ] Github: PR 템플릿, 이슈 템플릿, Github Actions 설정 등 -- [ ] Conflict: 머지 시 충돌 해결 - - -## ✨ 변경 사항 -- 이 PR에서 어떤 작업을 했는지 요약해주세요. -- 주요 변경 사항, 기능, 개선 내용을 자세히 작성해주세요. - - -## 🔍 리뷰어에게 -- 리뷰어가 집중해서 봐야 할 포인트가 있다면 알려주세요. -- 추가 설명이 필요한 부분이 있다면 작성해주세요. - - -## ✅ PR 체크리스트 -- [ ] 커밋 메시지를 컨벤션에 맞게 작성했습니다. -- [ ] 변경 사항을 로컬에서 테스트했습니다. -- [ ] 관련 라벨을 선택했습니다. - - -## 🔗 관련 이슈 -- 이 PR과 연결된 이슈가 있다면 번호를 적어주세요. -- `closed #이슈번호` 형태로 적으면 머지 시 자동으로 이슈가 닫힙니다. \ No newline at end of file diff --git a/.github/workflows/batch-dev-deploy.yml b/.github/workflows/batch-dev-deploy.yml new file mode 100644 index 0000000..f3e11f9 --- /dev/null +++ b/.github/workflows/batch-dev-deploy.yml @@ -0,0 +1,41 @@ +name: talkpick-batch-dev-deploy + +on: + push: + branches: + - dev + paths-ignore: + - '.github/**' + workflow_dispatch: + +jobs: + deploy: + runs-on: self-hosted + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Set up Java + uses: actions/setup-java@v3 + with: + distribution: 'temurin' + java-version: '21' + - name: Set custom build directory dynamically + run: | + echo "buildDir=/deploy/${{ github.event.repository.name }}/build/gradle" >> gradle.properties + + - name: Make gradlew executable + run: chmod +x ./gradlew + + - name: Build Spring Boot (JAR) + run: ./gradlew bootJar + - name: Copy JAR to shared volume + run: | + cp /deploy/${{ github.event.repository.name }}/build/gradle/libs/*.jar \ + /deploy/${{ github.event.repository.name }}/build/app.jar + + - name: Restart Docker + run: | + cd /deploy + docker-compose up -d --build --force-recreate batch diff --git a/.github/workflows/d-day-labeler.yml b/.github/workflows/d-day-labeler.yml new file mode 100644 index 0000000..5046ba1 --- /dev/null +++ b/.github/workflows/d-day-labeler.yml @@ -0,0 +1,26 @@ +name: D-day-labeler + +on: + schedule: + - cron: '0 15 * * *' +permissions: + issues: write + pull-requests: write + contents: read + +jobs: + issue-d-day-labeler: + runs-on: ubuntu-latest + steps: + - name: Update D-n Labels + uses: yj-circle/issue-d-day-labeler@v1.0.1 + with: + token: ${{ secrets.GITHUB_TOKEN }} + + pr-d-day-labeler: + runs-on: [ubuntu-latest] + steps: + - name: Update D-n Labels + uses: naver/d-day-labeler@latest + with: + token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.gitignore b/.gitignore index 5c0b59b..64bad6c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,37 +1,114 @@ -HELP.md +# ============================== +# Build and Gradle +# ============================== .gradle build/ +**/build/ +!src/**/build/ + +# Gradle wrapper !gradle/wrapper/gradle-wrapper.jar -!**/src/main/**/build/ -!**/src/test/**/build/ -*.env -### STS ### -.apt_generated +!gradle-wrapper.properties + +# ============================== +# IDE 관련 설정 +# ============================== + +# IntelliJ IDEA +.idea +*.iml +*.iws +*.ipr +/out/ +.idea/**/ +.idea_modules/ +.idea/httpRequests +.idea/codestream.xml + +# Eclipse / STS +.apt_generated/ +.apt_generated_test/ .classpath -.factorypath .project -.settings +.settings/ +.factorypath .springBeans .sts4-cache -bin/ -!**/src/main/**/bin/ -!**/src/test/**/bin/ +.externalToolBuilders/ +*.launch +*.pydevproject +.cproject +.autotools +.buildpath +.loadpath +.recommenders/ +.cache-main +.scala_dependencies +.worksheet -### IntelliJ IDEA ### -.idea -*.iws -*.iml -*.ipr -out/ -!**/src/main/**/out/ -!**/src/test/**/out/ - -### NetBeans ### -/nbproject/private/ -/nbbuild/ -/dist/ -/nbdist/ -/.nb-gradle/ - -### VS Code ### +# VS Code .vscode/ +.history/ +.ionide/ + +# ============================== +# Development files +# ============================== +*.env +*.http + +# ============================== +# OS/시스템 파일 +# ============================== +.DS_Store +.AppleDouble +.LSOverride +Icon +._* +.Spotlight-V100 +.Trashes +.fseventsd +.com.apple.timemachine.donotpresent +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk +*.icloud + +# Windows +Thumbs.db +ehthumbs.db +Thumbs.db:encryptable +ehthumbs_vista.db +*.stackdump +*.lnk +Desktop.ini +$RECYCLE.BIN/ + +# ============================== +# Java 관련 +# ============================== +*.class +*.log +*.jar +*.war +*.nar +*.ear +*.zip +*.tar.gz +*.gz +*.rar +*.hprof +hs_err_pid* +replay_pid* + +# ============================== +# 기타 백업/임시파일 +# ============================== +*.tmp +*.bak +*.swp +*~.nib +*~ +~$* \ No newline at end of file diff --git a/build.gradle b/build.gradle index 8993aee..170ffb5 100644 --- a/build.gradle +++ b/build.gradle @@ -27,10 +27,12 @@ dependencies { // Spring-boot implementation 'org.springframework.boot:spring-boot-starter-web' - implementation 'com.fasterxml.jackson.dataformat:jackson-dataformat-xml' + + implementation 'me.paulschwarz:spring-dotenv:3.0.0' testImplementation 'org.springframework.boot:spring-boot-starter-test' testRuntimeOnly 'org.junit.platform:junit-platform-launcher' + implementation 'org.springframework.boot:spring-boot-starter-validation' // Spring-batch implementation 'org.springframework.boot:spring-boot-starter-batch' @@ -48,12 +50,20 @@ dependencies { implementation 'org.springframework.boot:spring-boot-starter-data-elasticsearch' implementation 'org.elasticsearch.client:elasticsearch-rest-high-level-client:7.17.10' - // OpenAi + //OpenAi implementation 'org.springframework.ai:spring-ai-openai-spring-boot-starter:1.0.0-M5' //Swagger implementation 'org.springdoc:springdoc-openapi-starter-webmvc-ui:2.7.0' + //P6spy + implementation "com.github.gavlyukovskiy:p6spy-spring-boot-starter:1.9.0" + + //RSS + implementation 'org.springframework.boot:spring-boot-starter-quartz' + implementation 'com.rometools:rome:1.18.0' + implementation 'org.jsoup:jsoup:1.20.1' + } tasks.named('test') { diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/TalkpickBatchApplication.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/TalkpickBatchApplication.java index d9dec07..5a2a49a 100644 --- a/src/main/java/com/likelion/backendplus4/talkpick/batch/TalkpickBatchApplication.java +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/TalkpickBatchApplication.java @@ -9,5 +9,4 @@ public class TalkpickBatchApplication { public static void main(String[] args) { SpringApplication.run(TalkpickBatchApplication.class, args); } - } diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/annotation/logging/EntryExitLog.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/annotation/logging/EntryExitLog.java new file mode 100644 index 0000000..b6326a1 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/annotation/logging/EntryExitLog.java @@ -0,0 +1,25 @@ +package com.likelion.backendplus4.talkpick.batch.common.annotation.logging; + +import java.lang.annotation.Documented; +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * 메서드의 진입 및 종료 시점을 기록하기 위한 애노테이션 + * + * @since 2025-05-10 + */ +@Target(ElementType.METHOD) +@Retention(RetentionPolicy.RUNTIME) +@Documented +public @interface EntryExitLog { + /** + * 기록할 로그 레벨을 지정한다. + * + * @return 로그 레벨 문자열 (예: "debug", "info") + * @since 2025-05-10 + */ + String logLevel() default "info"; +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/annotation/logging/LogJson.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/annotation/logging/LogJson.java new file mode 100644 index 0000000..e7cd758 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/annotation/logging/LogJson.java @@ -0,0 +1,25 @@ +package com.likelion.backendplus4.talkpick.batch.common.annotation.logging; + +import java.lang.annotation.Documented; +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * 메서드의 진입 및 종료 시점을 기록하기 위한 애노테이션 + * + * @since 2025-05-10 + */ +@Target(ElementType.METHOD) +@Retention(RetentionPolicy.RUNTIME) +@Documented +public @interface LogJson { + /** + * 기록할 로그 레벨을 지정한다. + * + * @return 로그 레벨 문자열 (예: "debug", "info") + * @since 2025-05-10 + */ + String logLevel() default "info"; +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/annotation/logging/LogMethodValues.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/annotation/logging/LogMethodValues.java new file mode 100644 index 0000000..6ae5f26 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/annotation/logging/LogMethodValues.java @@ -0,0 +1,25 @@ +package com.likelion.backendplus4.talkpick.batch.common.annotation.logging; + +import java.lang.annotation.Documented; +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * 메서드의 진입 및 종료 시점을 기록하기 위한 애노테이션 + * + * @since 2025-05-10 + */ +@Target(ElementType.METHOD) +@Retention(RetentionPolicy.RUNTIME) +@Documented +public @interface LogMethodValues { + /** + * 기록할 로그 레벨을 지정한다. + * + * @return 로그 레벨 문자열 (예: "debug", "info") + * @since 2025-05-10 + */ + String logLevel() default "info"; +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/annotation/logging/TimeTracker.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/annotation/logging/TimeTracker.java new file mode 100644 index 0000000..ddec90f --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/annotation/logging/TimeTracker.java @@ -0,0 +1,25 @@ +package com.likelion.backendplus4.talkpick.batch.common.annotation.logging; + +import java.lang.annotation.Documented; +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * 메서드의 진입 및 종료 시점을 기록하기 위한 애노테이션 + * + * @since 2025-05-10 + */ +@Target(ElementType.METHOD) +@Retention(RetentionPolicy.RUNTIME) +@Documented +public @interface TimeTracker { + /** + * 기록할 로그 레벨을 지정한다. + * + * @return 로그 레벨 문자열 (예: "debug", "info") + * @since 2025-05-10 + */ + String logLevel() default "info"; +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/aop/logging/EntryExitLogAspect.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/aop/logging/EntryExitLogAspect.java new file mode 100644 index 0000000..6aab7c3 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/aop/logging/EntryExitLogAspect.java @@ -0,0 +1,63 @@ +package com.likelion.backendplus4.talkpick.batch.common.aop.logging; + +import org.aspectj.lang.ProceedingJoinPoint; +import org.aspectj.lang.annotation.Around; +import org.aspectj.lang.annotation.Aspect; +import org.springframework.core.annotation.Order; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.EntryExitLog; + +import lombok.extern.slf4j.Slf4j; + +/** + * EntryExitLog 애노테이션이 적용된 메서드의 진입과 종료 시점을 로그로 기록하는 AOP 클래스 + * + * @since 2025-05-10 + */ +@Aspect +@Component +@Slf4j +@Order(1) +public class EntryExitLogAspect { + + /** + * EntryExitLog 애노테이션이 적용된 메서드를 감싸서 시작 전과 종료 후에 로그를 출력한다. + * + * @param pjp 실행 중인 JoinPoint + * @param entryExitLog EntryExitLog 애노테이션 정보 + * @return 메서드 실행 결과 객체 + * @throws Throwable 메서드 실행 중 발생한 예외 + * @author 정안식 + * @since 2025-05-10 + */ + @Around("@annotation(entryExitLog)") + public Object logAround(ProceedingJoinPoint pjp, EntryExitLog entryExitLog) throws Throwable { + String className = pjp.getTarget().getClass().getSimpleName(); + String method = pjp.getSignature().toShortString(); + String logLevel = entryExitLog.logLevel().toLowerCase(); + log(logLevel, "{}-{} 메서드 시작", className, method); + + Object result = pjp.proceed(); + + log(logLevel, "{}-{} 메서드 종료", className, method); + return result; + } + + /** + * 지정된 로그 레벨에 따라 메시지를 출력한다. + * + * @param logLevel 로그 레벨 (debug 또는 info) + * @param format 출력할 메시지 포맷 + * @param args 포맷에 전달할 인자 + * @author 정안식 + * @since 2025-05-10 + */ + private void log(String logLevel, String format, Object... args) { + if ("debug".equals(logLevel)) { + log.debug(format, args); + } else { + log.info(format, args); + } + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/aop/logging/LogJsonAspect.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/aop/logging/LogJsonAspect.java new file mode 100644 index 0000000..d007d30 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/aop/logging/LogJsonAspect.java @@ -0,0 +1,90 @@ +package com.likelion.backendplus4.talkpick.batch.common.aop.logging; + +import org.aspectj.lang.ProceedingJoinPoint; +import org.aspectj.lang.annotation.Around; +import org.aspectj.lang.annotation.Aspect; +import org.springframework.core.annotation.Order; +import org.springframework.stereotype.Component; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.LogJson; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +/** + * LogJson 애노테이션이 적용된 메서드의 입력값과 반환값을 JSON 형식으로 변환하여 로그로 기록하는 AOP 클래스 + * + * @since 2025-05-10 + */ +@Aspect +@Component +@Slf4j +@RequiredArgsConstructor +@Order(3) +public class LogJsonAspect { + + private final ObjectMapper objectMapper; + + /** + * LogJson 애노테이션이 적용된 메서드의 입력값과 반환값을 JSON 문자열로 변환하여 로그로 출력한다. + * + * @param pjp 실행 중인 JoinPoint + * @param logJson LogJson 애노테이션 정보 + * @return 메서드 실행 결과 + * @throws Throwable 실행 중 발생한 예외 + * @author 정안식 + * @since 2025-05-10 + */ + @Around("@annotation(logJson)") + public Object logJson(ProceedingJoinPoint pjp, LogJson logJson) throws Throwable { + String className = pjp.getTarget().getClass().getSimpleName(); + String method = pjp.getSignature().toShortString(); + String logLevel = logJson.logLevel().toLowerCase(); + + logJsonSafely(logLevel, "{}-{} 메서드 [JSON 입력값] - {}", className, method, pjp.getArgs()); + + Object result = pjp.proceed(); + + logJsonSafely(logLevel, "{}-{} 메서드 [JSON 반환값] - {}", className, method, result); + + return result; + } + + /** + * JSON 변환에 실패하지 않도록 안전하게 로그를 출력한다. + * + * @param logLevel 로그 레벨 (debug 또는 info) + * @param format 로그에 출력할 메시지 포맷 + * @param className 클래스 이름 문자열 + * @param method 메서드 정보 문자열 + * @param target 변환 대상 객체 + * @author 정안식 + * @since 2025-05-10 + */ + private void logJsonSafely(String logLevel, String format, String className, String method, Object target) { + try { + String json = objectMapper.writeValueAsString(target); + log(logLevel, format, className, method, json); + } catch (Exception e) { + log.warn("{}-{} 메서드 JSON 변환 실패", className, method, e); + } + } + + /** + * 지정된 로그 레벨에 따라 메시지를 출력한다. + * + * @param logLevel 로그 레벨 (debug 또는 info) + * @param format 출력할 메시지 포맷 + * @param args 포맷에 전달할 인자 + * @author 정안식 + * @since 2025-05-10 + */ + private void log(String logLevel, String format, Object... args) { + if ("debug".equals(logLevel)) { + log.debug(format, args); + } else { + log.info(format, args); + } + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/aop/logging/LogMethodValuesAspect.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/aop/logging/LogMethodValuesAspect.java new file mode 100644 index 0000000..60750cc --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/aop/logging/LogMethodValuesAspect.java @@ -0,0 +1,66 @@ +package com.likelion.backendplus4.talkpick.batch.common.aop.logging; + +import java.util.Arrays; + +import org.aspectj.lang.ProceedingJoinPoint; +import org.aspectj.lang.annotation.Around; +import org.aspectj.lang.annotation.Aspect; +import org.springframework.core.annotation.Order; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.LogMethodValues; + +import lombok.extern.slf4j.Slf4j; + +/** + * LogMethodValues 애노테이션이 적용된 메서드의 인자와 반환값을 로그로 기록하는 AOP 클래스 + * + * @since 2025-05-10 + */ +@Aspect +@Component +@Slf4j +@Order(1) +public class LogMethodValuesAspect { + + /** + * LogMethodValues 애노테이션이 적용된 메서드의 인자와 반환값을 로그로 출력한다. + * + * @param pjp 실행 중인 JoinPoint + * @param logMethodValues LogMethodValues 애노테이션 정보 + * @return 메서드 실행 결과 + * @throws Throwable 실행 중 발생한 예외 + * @author 정안식 + * @since 2025-05-10 + */ + @Around("@annotation(logMethodValues)") + public Object logArgsAndReturn(ProceedingJoinPoint pjp, LogMethodValues logMethodValues) throws Throwable { + String className = pjp.getTarget().getClass().getSimpleName(); + String method = pjp.getSignature().toShortString(); + String logLevel = logMethodValues.logLevel().toLowerCase(); + + log(logLevel, "{}-{}메서드[ARGS] - {}", className, method, Arrays.toString(pjp.getArgs())); + + Object result = pjp.proceed(); + + log(logLevel, "{}-{}메서드[RETURN] - {}", className, method, result); + return result; + } + + /** + * 지정된 로그 레벨에 따라 메시지를 출력한다. + * + * @param logLevel 로그 레벨 (debug 또는 info) + * @param format 출력할 메시지 포맷 + * @param args 포맷에 전달할 인자 + * @author 정안식 + * @since 2025-05-10 + */ + private void log(String logLevel, String format, Object... args) { + if ("debug".equals(logLevel)) { + log.debug(format, args); + } else { + log.info(format, args); + } + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/aop/logging/TimeTrackerAspect.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/aop/logging/TimeTrackerAspect.java new file mode 100644 index 0000000..1482610 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/aop/logging/TimeTrackerAspect.java @@ -0,0 +1,67 @@ +package com.likelion.backendplus4.talkpick.batch.common.aop.logging; + +import org.aspectj.lang.ProceedingJoinPoint; +import org.aspectj.lang.annotation.Around; +import org.aspectj.lang.annotation.Aspect; +import org.springframework.core.annotation.Order; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.TimeTracker; + +import lombok.extern.slf4j.Slf4j; + +/** + * TimeTracker 애노테이션이 적용된 메서드의 실행 시간을 측정하여 로그로 남기는 AOP 클래스 + * + * @since 2025-05-10 + */ +@Aspect +@Component +@Slf4j +@Order(2) +public class TimeTrackerAspect { + + /** + * TimeTracker 애노테이션이 적용된 메서드를 감싸서 실행 시간을 기록하고 로그를 출력한다. + * + * @param pjp 실행 중인 JoinPoint + * @param timeTracker TimeTracker 애노테이션 정보 + * @return 메서드 실행 결과 + * @throws Throwable 실행 중 발생한 예외 + * @author 정안식 + * @since 2025-05-10 + */ + @Around("@annotation(timeTracker)") + public Object trackTime(ProceedingJoinPoint pjp, TimeTracker timeTracker) throws Throwable { + String method = pjp.getSignature().toShortString(); + String logLevel = timeTracker.logLevel().toLowerCase(); + + long start = System.currentTimeMillis(); + + Object result = pjp.proceed(); + + long elapsedMillis = System.currentTimeMillis() - start; + double elapsedSeconds = elapsedMillis / 1000.0; + String formatted = String.format("%.3f", elapsedSeconds); + + log(logLevel, "{} 실행 시간 = {} 초", method, formatted); + return result; + } + + /** + * 지정된 로그 레벨에 따라 메시지를 출력한다. + * + * @param logLevel 로그 레벨 (debug 또는 info) + * @param format 출력할 메시지 포맷 + * @param args 포맷에 전달할 인자 + * @author 정안식 + * @since 2025-05-10 + */ + private void log(String logLevel, String format, Object... args) { + if ("debug".equals(logLevel)) { + log.debug(format, args); + } else { + log.info(format, args); + } + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/WebConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/WebConfig.java new file mode 100644 index 0000000..24b5c5c --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/WebConfig.java @@ -0,0 +1,36 @@ +package com.likelion.backendplus4.talkpick.batch.common.configuration; + +import org.springframework.context.annotation.Configuration; +import org.springframework.web.servlet.config.annotation.InterceptorRegistry; +import org.springframework.web.servlet.config.annotation.WebMvcConfigurer; + +import com.likelion.backendplus4.talkpick.batch.common.interceptor.logging.LogInterceptor; + +import lombok.RequiredArgsConstructor; + +/** + * 인터셉터를 등록하는 Web MVC 설정 클래스 + * + * @since 2025-05-10 + */ +@Configuration +@RequiredArgsConstructor +public class WebConfig implements WebMvcConfigurer { + + private static final String ALL_PATTERN = "/**"; + + private final LogInterceptor logInterceptor; + + /** + * LogInterceptor를 모든 경로에 등록한다. + * + * @param registry InterceptorRegistry 인터셉터 레지스트리 + * @author 정안식 + * @since 2025-05-10 + */ + @Override + public void addInterceptors(InterceptorRegistry registry) { + registry.addInterceptor(logInterceptor) + .addPathPatterns(ALL_PATTERN); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/elasticsearch/ElasticsearchConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/elasticsearch/ElasticsearchConfig.java new file mode 100644 index 0000000..8688c24 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/elasticsearch/ElasticsearchConfig.java @@ -0,0 +1,7 @@ +package com.likelion.backendplus4.talkpick.batch.common.configuration.elasticsearch; + +import org.springframework.context.annotation.Configuration; + +@Configuration +public class ElasticsearchConfig { +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/logging/LogbackConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/logging/LogbackConfig.java new file mode 100644 index 0000000..a2f033f --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/logging/LogbackConfig.java @@ -0,0 +1,178 @@ +package com.likelion.backendplus4.talkpick.batch.common.configuration.logging; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Configuration; + +import ch.qos.logback.classic.Level; +import ch.qos.logback.classic.LoggerContext; +import ch.qos.logback.classic.encoder.PatternLayoutEncoder; +import ch.qos.logback.classic.spi.ILoggingEvent; +import ch.qos.logback.core.ConsoleAppender; +import ch.qos.logback.core.FileAppender; +import ch.qos.logback.core.rolling.TimeBasedRollingPolicy; +import ch.qos.logback.core.util.FileSize; +import jakarta.annotation.PostConstruct; + +/** + * Logback 설정을 위한 구성 클래스. + * application.properties의 log.rolling.* 설정에 따라 + * 콘솔 및 파일 appender를 생성하고 루트 로거에 등록한다. + * + * @since 2025-05-10 + */ +@Configuration +public class LogbackConfig { + @Value("${log.rolling.directory}") + private String LOG_DIRECTORY; + @Value("${log.rolling.file-name}") + private String LOG_FILE_NAME; + @Value("${log.rolling.pattern}") + private String LOG_PATTERN; + @Value("${log.rolling.max-history}") + private int MAX_HISTORY; + @Value("${log.rolling.total-size-cap}") + private String TOTAL_SIZE_CAP; + + /** + * 로그 설정을 초기화하고 콘솔 및 파일 appender를 구성한다. + * + * @author 정안식 + * @since 2025-05-10 + */ + @PostConstruct + public void configure() { + LoggerContext context = initializeLoggerContext(); + createLogDirectory(); + + ConsoleAppender consoleAppender = createConsoleAppender(context); + FileAppender fileAppender = createFileAppender(context); + + configureRootLogger(context, consoleAppender, fileAppender); + } + + /** + * LoggerContext를 초기화하고 리셋하여 반환한다. + * + * @return 초기화된 LoggerContext 객체 + * @author 정안식 + * @since 2025-05-10 + */ + private LoggerContext initializeLoggerContext() { + LoggerContext context = (LoggerContext)LoggerFactory.getILoggerFactory(); + context.reset(); + return context; + } + + /** + * 로그 디렉토리를 생성한다. 존재하지 않을 경우 새로 생성한다. + * + * @author 정안식 + * @since 2025-05-10 + */ + private void createLogDirectory() { + Path logPath = Paths.get(LOG_DIRECTORY); + try { + if (!Files.exists(logPath)) { + Files.createDirectories(logPath); + } + } catch (Exception e) { + throw new RuntimeException("로그 디렉토리 생성 실패", e); + } + } + + /** + * 콘솔 appender를 생성하여 반환한다. + * + * @param context LoggerContext 객체 + * @return 생성된 ConsoleAppender + * @author 정안식 + * @since 2025-05-10 + */ + private ConsoleAppender createConsoleAppender(LoggerContext context) { + ConsoleAppender appender = new ConsoleAppender<>(); + appender.setContext(context); + appender.setEncoder(createEncoder(context)); + appender.start(); + return appender; + } + + /** + * 파일 appender를 생성하여 반환한다. + * + * @param context LoggerContext 객체 + * @return 생성된 FileAppender + * @since 2025-05-10 + */ + private FileAppender createFileAppender(LoggerContext context) { + FileAppender appender = new FileAppender<>(); + appender.setContext(context); + appender.setFile(LOG_DIRECTORY + "/" + LOG_FILE_NAME); + appender.setAppend(true); + appender.setEncoder(createEncoder(context)); + + TimeBasedRollingPolicy rollingPolicy = createRollingPolicy(context, appender); + rollingPolicy.start(); + + appender.start(); + return appender; + } + + /** + * PatternLayoutEncoder를 생성하여 반환한다. + * + * @param context LoggerContext 객체 + * @return 생성된 PatternLayoutEncoder + * @since 2025-05-10 + */ + private PatternLayoutEncoder createEncoder(LoggerContext context) { + PatternLayoutEncoder encoder = new PatternLayoutEncoder(); + encoder.setContext(context); + encoder.setPattern(LOG_PATTERN); + encoder.start(); + return encoder; + } + + /** + * 롤링 정책을 생성하여 반환한다. + * + * @param context LoggerContext 객체 + * @param parent 파일 appender + * @return 생성된 TimeBasedRollingPolicy + * @since 2025-05-10 + */ + private TimeBasedRollingPolicy createRollingPolicy(LoggerContext context, + FileAppender parent) { + TimeBasedRollingPolicy policy = new TimeBasedRollingPolicy<>(); + policy.setContext(context); + policy.setParent(parent); + policy.setFileNamePattern(LOG_DIRECTORY + "/" + LOG_FILE_NAME.replace(".log", ".%d{yyyy-MM-dd}.log")); + policy.setMaxHistory(MAX_HISTORY); + policy.setTotalSizeCap(FileSize.valueOf(TOTAL_SIZE_CAP)); + return policy; + } + + /** + * 루트 로거에 레벨 설정 및 appender를 등록한다. + * + * @param context LoggerContext 객체 + * @param consoleAppender ConsoleAppender 객체 + * @param fileAppender FileAppender 객체 + * @since 2025-05-10 + */ + private void configureRootLogger(LoggerContext context, ConsoleAppender consoleAppender, + FileAppender fileAppender) { + Logger logger = LoggerFactory.getLogger(Logger.ROOT_LOGGER_NAME); + if (logger instanceof ch.qos.logback.classic.Logger) { + ch.qos.logback.classic.Logger rootLogger = (ch.qos.logback.classic.Logger)logger; + rootLogger.setLevel(Level.INFO); + rootLogger.addAppender(consoleAppender); + rootLogger.addAppender(fileAppender); + } + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/openai/OpenaiConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/openai/OpenaiConfig.java new file mode 100644 index 0000000..f3a5f4d --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/openai/OpenaiConfig.java @@ -0,0 +1,60 @@ +package com.likelion.backendplus4.talkpick.batch.common.configuration.openai; + +import org.springframework.ai.chat.client.ChatClient; +import org.springframework.ai.openai.OpenAiChatModel; +import org.springframework.ai.openai.OpenAiChatOptions; +import org.springframework.ai.openai.api.OpenAiApi; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +/** + * OpenAI API 클라이언트 빈을 생성하는 구성 클래스 + * + * @since 2025-05-11 + */ +@Configuration +public class OpenaiConfig { + private final String apiKey; + private final String chatModelName; + private final Double temperature; + private final Integer maxToken; + + public OpenaiConfig( + @Value("${spring.ai.openai.api-key}") String apiKey, + @Value("${spring.ai.openai.summary.model}") String chatModelName, + @Value("${spring.ai.openai.summary.temperature}") Double temperature, + @Value("${spring.ai.openai.summary.maxCompletionTokens}") Integer maxToken) { + this.apiKey = apiKey; + this.chatModelName = chatModelName; + this.temperature = temperature; + this.maxToken = maxToken; + } + + /** + * OpenAiApi 빈을 생성한다. + * + * @return OpenAI API 클라이언트 인스턴스 + * @author 정안식 + * @since 2025-05-11 + */ + @Bean + public OpenAiApi openaiApi() { + return new OpenAiApi(apiKey); + } + + @Bean + public ChatClient chatClient(OpenAiChatModel chatModel) { + return ChatClient.create(chatModel); + } + + @Bean + public OpenAiChatModel chatModel(OpenAiApi openAiApi) { + OpenAiChatOptions options = OpenAiChatOptions.builder() + .model(chatModelName) + .temperature(temperature) + .maxCompletionTokens(maxToken) + .build(); + return new OpenAiChatModel(openAiApi, options); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/p6spy/P6spyConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/p6spy/P6spyConfig.java new file mode 100644 index 0000000..0e82519 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/configuration/p6spy/P6spyConfig.java @@ -0,0 +1,147 @@ +package com.likelion.backendplus4.talkpick.batch.common.configuration.p6spy; + +import com.p6spy.engine.logging.Category; +import com.p6spy.engine.spy.P6SpyOptions; +import com.p6spy.engine.spy.appender.MessageFormattingStrategy; +import jakarta.annotation.PostConstruct; +import java.util.Locale; +import org.hibernate.engine.jdbc.internal.FormatStyle; +import org.springframework.context.annotation.Configuration; + +/** + * P6Spy SQL 로깅 설정 클래스 + * P6Spy 의 MessageFormattingStrategy 를 구현하여, + * SQL 로그를 카테고리, 실행 시간, 포맷된 쿼리로 출력하도록 커스터마이징합니다. + * + * @since 2025-05-09 + * @modified 2025-05-09 + */ +@Configuration +public class P6spyConfig implements MessageFormattingStrategy { + + /** + * Spring 컨텍스트 초기화 직후 호출되어, P6Spy 옵션에 이 클래스의 포맷터를 사용하도록 등록합니다. + * + * @author 박찬병 + * @modified 2025-05-09 + * @since 2025-05-09 + */ + @PostConstruct + public void setLogMessageFormat() { + // P6Spy 의 ActiveInstance 에 포맷터 클래스 이름을 지정 + P6SpyOptions.getActiveInstance() + .setLogMessageFormat(this.getClass().getName()); + } + + /** + * 실제 로그 메시지를 생성하는 엔트리 포인트 메서드. 카테고리에 따라 SQL 을 포맷팅하고, 실행 시간과 함께 출력합니다. + * + * @param connectionId 커넥션 고유 ID + * @param now 로그 출력 시각 (문자열) + * @param elapsed 쿼리 실행 경과 시간 (ms) + * @param category P6Spy 로깅 카테고리 (STATEMENT, RESULT, COMMIT 등) + * @param prepared PreparedStatement 템플릿 (파라미터 바인딩 전 SQL) + * @param sql 바인딩된 실제 SQL + * @param url 데이터소스 URL + * @return 카테고리, 실행 시간, 포맷된 SQL 을 포함한 로그 문자열 + * @author 박찬병 + * @modified 2025-05-09 + * @since 2025-05-09 + */ + @Override + public String formatMessage( + int connectionId, + String now, + long elapsed, + String category, + String prepared, + String sql, + String url + ) { + sql = formatSql(category, sql); + return String.format("[%s] | %d ms | %s", category, elapsed, sql); + } + + /** + * SQL 문을 읽기 좋게 포맷팅합니다. DDL 문(create/alter/comment)인 경우에는 FormatStyle.DDL, 그 외 쿼리는 + * FormatStyle.BASIC 스타일을 적용합니다. + * + * @param category P6Spy 로깅 카테고리 + * @param sql 실제 실행된 SQL + * @return 포맷팅된 SQL (또는 SQL 이 비어있으면 원본 반환) + * @author 박찬병 + * @modified 2025-05-09 + * @since 2025-05-10 + */ + private String formatSql(String category, String sql) { + if (isEmptySql(sql)) { + return sql; + } + + if (isStatementCategory(category)) { + return formatStatementSql(sql); + } + + return sql; + } + + + /** + * SQL이 비어있는지 확인합니다. + * + * @param sql 실행된 SQL 문자열 + * @return 비어있으면 true, 아니면 false + * @author 박찬병 + * @modified 2025-05-10 + * @since 2025-05-10 + */ + private boolean isEmptySql(String sql) { + return sql == null || sql.isBlank(); + } + + /** + * 주어진 카테고리가 STATEMENT 인지 여부를 판단합니다. + * + * @param category P6Spy 로깅 카테고리 + * @return STATEMENT 카테고리이면 true, 아니면 false + * @author 박찬병 + * @modified 2025-05-10 + * @since 2025-05-10 + */ + private boolean isStatementCategory(String category) { + return Category.STATEMENT.getName().equals(category); + } + + /** + * STATEMENT 카테고리의 SQL을 포맷팅합니다. + * + * @param sql 실행된 SQL 문자열 + * @return 포맷팅된 SQL + * @author 박찬병 + * @modified 2025-05-09 + * @since 2025-05-10 + */ + private String formatStatementSql(String sql) { + if (isDdlStatement(sql)) { + return FormatStyle.DDL.getFormatter().format(sql); + } else { + return FormatStyle.BASIC.getFormatter().format(sql); + } + } + + /** + * 주어진 SQL 문이 DDL(create/alter/comment) 문인지 여부를 판단합니다. + * + * @param sql 실행된 SQL 문자열 + * @return DDL 문이면 true, 아니면 false + * @author 박찬병 + * @modified 2025-05-09 + * @since 2025-05-10 + */ + private boolean isDdlStatement(String sql) { + String trimmedSQL = sql.trim().toLowerCase(Locale.ROOT); + return trimmedSQL.startsWith("create") + || trimmedSQL.startsWith("alter") + || trimmedSQL.startsWith("comment"); + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/decorator/logging/MdcTaskDecorator.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/decorator/logging/MdcTaskDecorator.java new file mode 100644 index 0000000..d958d51 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/decorator/logging/MdcTaskDecorator.java @@ -0,0 +1,41 @@ +package com.likelion.backendplus4.talkpick.batch.common.decorator.logging; + +import java.util.Map; + +import org.slf4j.MDC; +import org.springframework.core.task.TaskDecorator; +import org.springframework.stereotype.Component; + +/** + * 스레드 풀에서 실행되는 Task에 MDC(Context Map)를 전파하기 위한 TaskDecorator 구현체 + * MDC 정보를 부모 스레드에서 자식 스레드로 복사하여 로그 추적 정보를 유지하도록 한다. + * + * @since 2025-05-10 + */ +@Component +public class MdcTaskDecorator implements TaskDecorator { + + /** + * Runnable 실행 시 부모 스레드의 MDC(Context Map)를 자식 스레드로 복사하여 설정한다. + * 실행 후 MDC를 반드시 clear하여 메모리 누수를 방지한다. + * + * @param runnable 실행할 원본 Runnable + * @return MDC context를 설정한 새로운 Runnable + * @author 정안식 + * @since 2025-05-10 + */ + @Override + public Runnable decorate(Runnable runnable) { + Map contextMap = MDC.getCopyOfContextMap(); + return () -> { + if (contextMap != null) { + MDC.setContextMap(contextMap); + } + try { + runnable.run(); + } finally { + MDC.clear(); + } + }; + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/exception/CustomException.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/exception/CustomException.java new file mode 100644 index 0000000..ee82829 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/exception/CustomException.java @@ -0,0 +1,25 @@ +package com.likelion.backendplus4.talkpick.batch.common.exception; + +import com.likelion.backendplus4.talkpick.batch.common.exception.error.ErrorCode; + +/** + * 사용자 정의 예외의 추상 클래스 애플리케이션 전역에서 사용하는 공통 예외 상위 타입이다. + * + * @modified 2025-05-09 + * @since 2025-05-09 + */ +public abstract class CustomException extends RuntimeException { + + + // 메시지만 포함하는 기본 생성자 + public CustomException(ErrorCode errorCode) { + super(errorCode.message()); + } + + // 메시지 + 원인 예외 포함하는 생상자 + public CustomException(ErrorCode errorCode, Throwable cause) { + super(errorCode.message(), cause); + } + + public abstract ErrorCode getErrorCode(); +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/exception/error/ErrorCode.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/exception/error/ErrorCode.java new file mode 100644 index 0000000..484d375 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/exception/error/ErrorCode.java @@ -0,0 +1,61 @@ +package com.likelion.backendplus4.talkpick.batch.common.exception.error; + +import org.springframework.http.HttpStatus; + +/** + * 에러 코드 인터페이스 각 에러 항목에 대한 HTTP 상태, 에러 번호, 메시지를 제공한다. + * A[BB][CCC] + * A (1자리) : 에러 심각도 (1~5) + * 1: 클라이언트 오류 + * 2: 인증 관련 오류 + * 3: 사용자 관련 오류 + * 4: 서버 오류 + * 5: 시스템 오류 + * + * BB (2자리) : 도메인 코드 + * 10: 사용자 관련 (ex: USER_NOT_FOUND) + * 20: 인증 관련 (ex: AUTHORIZATION_FAILED) + * 30: DB 관련 오류 (ex: DB_CONNECTION_FAILED) + * 40: API 관련 오류 (ex: API_TIMEOUT) + * 50: 시스템 오류 (ex: INTERNAL_SERVER_ERROR) + * + * CCC (3자리) : 세부 오류 순번 + * 001: 첫 번째 오류 + * 002: 두 번째 오류 + * 003: 세 번째 오류, 등등 + * + * @modified 2025-05-09 + * @since 2025-05-09 + */ +public interface ErrorCode { + + /** + * HTTP 상태 반환 + * + * @return HTTP 상태 + * @author 정안식 + * @modified 2025-05-09 박찬병 + * @since 2025-05-09 + */ + HttpStatus httpStatus(); + + /** + * 에러 코드 번호 반환 + * + * @return 에러 코드 번호 + * @author 정안식 + * @modified 2025-05-09 박찬병 + * @since 2025-05-09 + */ + int codeNumber(); + + /** + * 에러 메시지 반환 + * + * @return 에러 메시지 + * @author 정안식 + * @modified 2025-05-09 박찬병 + * @since 2025-05-09 + */ + String message(); +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/exception/error/GlobalErrorCode.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/exception/error/GlobalErrorCode.java new file mode 100644 index 0000000..4571169 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/exception/error/GlobalErrorCode.java @@ -0,0 +1,44 @@ +package com.likelion.backendplus4.talkpick.batch.common.exception.error; + +import lombok.AllArgsConstructor; +import lombok.Getter; + + +/** + * 에러 코드 인터페이스 각 에러 항목에 대한 HTTP 상태, 에러 번호, 메시지를 제공한다. + * A[BB][CCC] + * A (1자리) : 에러 심각도 (1~5) + * 1: 클라이언트 오류 + * 2: 인증 관련 오류 + * 3: 사용자 관련 오류 + * 4: 서버 오류 + * 5: 시스템 오류 + * + * BB (2자리) : 도메인 코드 + * 10: 사용자 관련 (ex: USER_NOT_FOUND) + * 20: 인증 관련 (ex: AUTHORIZATION_FAILED) + * 30: DB 관련 오류 (ex: DB_CONNECTION_FAILED) + * 40: API 관련 오류 (ex: API_TIMEOUT) + * 50: 시스템 오류 (ex: INTERNAL_SERVER_ERROR) + * + * CCC (3자리) : 세부 오류 순번 + * 001: 첫 번째 오류 + * 002: 두 번째 오류 + * 003: 세 번째 오류, 등등 + * + * @modified 2025-05-09 + * @since 2025-05-09 + */ +@AllArgsConstructor +@Getter +public enum GlobalErrorCode { + + ILLEGAL_ARGUMENT_CODE(14001), + NOT_FOUND_CODE(140002), + METHOD_ARGUMENT_NOT_VALID_CODE(300001), + BIND_EXCEPTION_CODE(300002), + INTERNAL_SERVER_ERROR_CODE(500000); + + private final int code; + +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/exception/handler/GlobalExceptionHandler.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/exception/handler/GlobalExceptionHandler.java new file mode 100644 index 0000000..28252c3 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/exception/handler/GlobalExceptionHandler.java @@ -0,0 +1,192 @@ +package com.likelion.backendplus4.talkpick.batch.common.exception.handler; + +import static com.likelion.backendplus4.talkpick.batch.common.exception.error.GlobalErrorCode.*; + +import com.likelion.backendplus4.talkpick.batch.common.exception.CustomException; +import com.likelion.backendplus4.talkpick.batch.common.exception.error.ErrorCode; +import com.likelion.backendplus4.talkpick.batch.common.response.ApiResponse; +import java.util.stream.Collectors; +import lombok.extern.slf4j.Slf4j; +import org.springframework.http.HttpStatus; +import org.springframework.http.ResponseEntity; +import org.springframework.validation.BindException; +import org.springframework.web.bind.MethodArgumentNotValidException; +import org.springframework.web.bind.annotation.ExceptionHandler; +import org.springframework.web.bind.annotation.RestControllerAdvice; +import org.springframework.web.servlet.NoHandlerFoundException; + +/** + * 전역 예외 처리 클래스 + * 컨트롤러에서 발생한 예외를 공통적으로 처리한다. + * + * @modified 2025-05-09 + * @since 2025-05-09 + */ +@Slf4j +@RestControllerAdvice +public class GlobalExceptionHandler { + + /** + * CustomException 처리 + * ErrorCode 인터페이스 기반으로 확장 가능한 방식으로 처리한다. + * + * @param ex CustomException 객체 + * @return 에러 응답 + * @author 정안식 + * @modified 2025-05-09 박찬병 + * @since 2025-05-09 + */ + @ExceptionHandler(CustomException.class) + public ResponseEntity> handleCustomException(CustomException ex) { + ErrorCode errorCode = ex.getErrorCode(); + return buildErrorResponse( + errorCode.httpStatus(), + errorCode.codeNumber(), + errorCode.message(), + ex + ); + } + + /** + * IllegalArgumentException 처리 + * 잘못된 파라미터에 대한 예외 응답 처리 + * + * @param ex 예외 객체 + * @return 에러 응답 + * @author 정안식 + * @modified 2025-05-09 박찬병 + * @since 2025-05-09 + */ + @ExceptionHandler(IllegalArgumentException.class) + public ResponseEntity> handleIllegalArgumentException(IllegalArgumentException ex) { + return buildErrorResponse( + HttpStatus.BAD_REQUEST, + ILLEGAL_ARGUMENT_CODE.getCode(), + ex.getMessage(), + ex + ); + } + + /** + * MethodArgumentNotValidException 처리 + * 유효성 검사 실패에 대한 응답 처리 + * + * @param ex 예외 객체 + * @return 에러 응답 + * @author 정안식 + * @modified 2025-05-09 박찬병 + * @since 2025-05-09 + */ + @ExceptionHandler(MethodArgumentNotValidException.class) + public ResponseEntity> handleMethodArgumentNotValidException(MethodArgumentNotValidException ex) { + String errorMessage = getErrorMessage(ex); + return buildErrorResponse( + HttpStatus.BAD_REQUEST, + METHOD_ARGUMENT_NOT_VALID_CODE.getCode(), + errorMessage, + ex + ); + } + + /** + * BindException 처리 + * 폼 바인딩 유효성 실패 시 처리 + * + * @param ex BindException 오류 + * @return 에러 응답 + * @author 박찬병 + * @modified 2025-05-09 박찬병 + * @since 2025-05-09 + */ + @ExceptionHandler(BindException.class) + public ResponseEntity> handleBindException(BindException ex) { + String errorMessage = getErrorMessage(ex); + return buildErrorResponse( + HttpStatus.BAD_REQUEST, + BIND_EXCEPTION_CODE.getCode(), + errorMessage, + ex + ); + } + + /** + * NoHandlerFoundException 처리 메서드 + * + * 클라이언트가 존재하지 않는 URL 경로로 요청했을 때 발생하는 + * NoHandlerFoundException을 잡아 404 Not Found 응답을 반환합니다. + * + * @param ex 요청한 경로에 매핑된 핸들러가 없음을 나타내는 예외 + * @return HTTP 404 상태와 표준화된 에러 페이로드를 담은 ResponseEntity + * @author 박찬병 + * @modified 2025-05-09 박찬병 + * @since 2025-05-09 + */ + @ExceptionHandler(NoHandlerFoundException.class) + public ResponseEntity> handleNoHandler(NoHandlerFoundException ex) { + return buildErrorResponse( + HttpStatus.NOT_FOUND, + NOT_FOUND_CODE.getCode(), + "요청하신 경로를 찾을 수 없습니다.", + ex + ); + } + + + /** + * 기타 모든 예외 처리 + * 정의되지 않은 예외는 내부 서버 오류로 응답 + * + * @param ex 예외 객체 + * @return 에러 응답 + * @author 정안식 + * @modified 2025-05-09 박찬병 + * @since 2025-05-09 + */ + @ExceptionHandler(Exception.class) + public ResponseEntity> handleAllExceptions(Exception ex) { + return buildErrorResponse( + HttpStatus.INTERNAL_SERVER_ERROR, + INTERNAL_SERVER_ERROR_CODE.getCode(), + "알 수 없는 오류가 발생했습니다.", + ex + ); + } + + /** + * 공통 에러 응답 생성 메서드 + * 예외 로깅 후 ApiResponse.error를 통해 표준화된 에러 응답을 생성한다. + * + * @param status HTTP 상태 코드 + * @param errorCode 에러 코드 (정수형) + * @param message 에러 메시지 + * @param ex 발생한 예외 객체 + * @return ResponseEntity> 형태의 에러 응답 + * @author 박찬병 + * @modified 2025-05-09 박찬병 + * @since 2025-05-09 + */ + private ResponseEntity> buildErrorResponse( + HttpStatus status, + int errorCode, + String message, + Throwable ex + ) { + log.error("{}: {}", ex.getClass().getSimpleName(), ex.getMessage(), ex); + return ApiResponse.error(status, String.valueOf(errorCode), message); + } + + /** + * BindingResult 분석 후 필드별 오류 메시지 조합 + * + * @param ex BindException 또는 MethodArgumentNotValidException 객체 + * @return 필드명과 메시지를 콤마로 연결한 오류 문자열 + * @author 박찬병 + * @modified 2025-05-09 박찬병 + * @since 2025-05-09 + */ + private String getErrorMessage(BindException ex) { + return ex.getBindingResult().getFieldErrors().stream() + .map(fe -> fe.getField() + ": " + fe.getDefaultMessage()) + .collect(Collectors.joining(", ")); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/interceptor/logging/LogInterceptor.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/interceptor/logging/LogInterceptor.java new file mode 100644 index 0000000..cb5d680 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/interceptor/logging/LogInterceptor.java @@ -0,0 +1,89 @@ +package com.likelion.backendplus4.talkpick.batch.common.interceptor.logging; + +import java.util.UUID; + +import org.slf4j.MDC; +import org.springframework.stereotype.Component; +import org.springframework.web.servlet.HandlerInterceptor; + +import jakarta.servlet.http.HttpServletRequest; +import jakarta.servlet.http.HttpServletResponse; +import lombok.extern.slf4j.Slf4j; + +/** + * 로깅을 위한 인터셉터 클래스 + * HTTP 요청 전후로 TraceId를 생성·설정·제거하여 로그 추적 정보를 지원한다. + * + * @since 2025-05-10 + */ +@Slf4j +@Component +public class LogInterceptor implements HandlerInterceptor { + + /** + * 요청 처리 전에 TraceId를 생성하고 MDC에 설정한다. + * + * @param request HttpServletRequest 요청 객체 + * @param response HttpServletResponse 응답 객체 + * @param handler Object 핸들러 객체 + * @return boolean 처리 계속 여부 + * @author 정안식 + * @since 2025-05-10 + */ + @Override + public boolean preHandle(HttpServletRequest request, + HttpServletResponse response, Object handler) { + String traceId = generateTraceId(); + setTraceId(traceId); + log.info("TraceId 생성 성공 - " + traceId); + return true; + } + + /** + * 요청 처리 완료 후 MDC에 설정된 TraceId를 제거한다. + * + * @param request HttpServletRequest 요청 객체 + * @param response HttpServletResponse 응답 객체 + * @param handler Object 핸들러 객체 + * @param ex Exception 발생 예외 객체 + * @author 정안식 + * @since 2025-05-10 + */ + @Override + public void afterCompletion(HttpServletRequest request, + HttpServletResponse response, Object handler, Exception ex) { + clearTraceId(); + } + + /** + * 새로운 UUID 형식의 TraceId를 생성한다. + * + * @return String 생성된 TraceId 문자열 + * @author 정안식 + * @since 2025-05-10 + */ + private String generateTraceId() { + return UUID.randomUUID().toString(); + } + + /** + * 생성된 TraceId를 MDC에 설정한다. + * + * @param traceId String 설정할 TraceId + * @author 정안식 + * @since 2025-05-10 + */ + private void setTraceId(String traceId) { + MDC.put("traceId", traceId); + } + + /** + * MDC에 설정된 모든 정보를 제거하여 메모리 누수를 방지한다. + * + * @author 정안식 + * @since 2025-05-10 + */ + private void clearTraceId() { + MDC.clear(); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/common/response/ApiResponse.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/response/ApiResponse.java new file mode 100644 index 0000000..6d92a67 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/common/response/ApiResponse.java @@ -0,0 +1,57 @@ +package com.likelion.backendplus4.talkpick.batch.common.response; + +import com.fasterxml.jackson.annotation.JsonInclude; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; +import lombok.NoArgsConstructor; +import org.springframework.http.HttpStatus; +import org.springframework.http.ResponseEntity; + +/** + * API 응답 포맷 클래스 정상 및 에러 응답을 통합된 형식으로 제공한다. + * + * @since 2025-05-09 + * @modified 2025-05-09 + */ +@Getter +@JsonInclude(JsonInclude.Include.NON_NULL) +@Builder +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor +public class ApiResponse { + + private static final String SUCCESS_MESSAGE = "요청 성공"; + + private String errorCode; + private String message; + private T data; + + public static ResponseEntity> success() { + ApiResponse body = ApiResponse.builder() + .message(SUCCESS_MESSAGE) + .build(); + return ResponseEntity.ok(body); + } + + + public static ResponseEntity> success(T data) { + ApiResponse body = ApiResponse.builder() + .message(SUCCESS_MESSAGE) + .data(data) + .build(); + return ResponseEntity.ok(body); + } + + + public static ResponseEntity> error(HttpStatus status, String errorCode, + String message) { + ApiResponse body = ApiResponse.builder() + .errorCode(errorCode) + .message(message) + .build(); + return ResponseEntity.status(status).body(body); + } + +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/application/port/in/NewsIndexUseCase.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/application/port/in/NewsIndexUseCase.java new file mode 100644 index 0000000..1c142b4 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/application/port/in/NewsIndexUseCase.java @@ -0,0 +1,5 @@ +package com.likelion.backendplus4.talkpick.batch.index.application.port.in; + +public interface NewsIndexUseCase { + int indexAllNewsInfo(); +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/application/port/out/NewsInfoIndexRepositoryPort.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/application/port/out/NewsInfoIndexRepositoryPort.java new file mode 100644 index 0000000..110217c --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/application/port/out/NewsInfoIndexRepositoryPort.java @@ -0,0 +1,22 @@ +package com.likelion.backendplus4.talkpick.batch.index.application.port.out; + +import java.util.List; + +import com.likelion.backendplus4.talkpick.batch.index.domain.model.NewsInfo; + +/** + * 뉴스 정보를 색인 저장소에 저장하는 포트 인터페이스 + * + * @since 2025-05-15 + */ +public interface NewsInfoIndexRepositoryPort { + /** + * 뉴스 정보 리스트를 색인 저장소에 저장한다. + * + * @param newsList 저장할 뉴스 정보 리스트 + * @return 저장된 뉴스 정보 건수 + * @author 정안식 + * @since 2025-05-15 + */ + int saveAll(List newsList); +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/application/port/out/NewsInfoProviderPort.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/application/port/out/NewsInfoProviderPort.java new file mode 100644 index 0000000..ff64069 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/application/port/out/NewsInfoProviderPort.java @@ -0,0 +1,21 @@ +package com.likelion.backendplus4.talkpick.batch.index.application.port.out; + +import java.util.List; + +import com.likelion.backendplus4.talkpick.batch.index.domain.model.NewsInfo; + +/** + * 외부 또는 내부에서 뉴스 정보를 조회하는 포트 인터페이스 + * + * @since 2025-05-15 + */ +public interface NewsInfoProviderPort { + /** + * 저장된 모든 뉴스 정보를 조회한다. + * + * @return 조회된 뉴스 정보 리스트 + * @author 정안식 + * @since 2025-05-15 + */ + List fetchAll(); +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/application/service/NewsIndexService.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/application/service/NewsIndexService.java new file mode 100644 index 0000000..7c243a2 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/application/service/NewsIndexService.java @@ -0,0 +1,43 @@ +package com.likelion.backendplus4.talkpick.batch.index.application.service; + +import java.util.List; + +import org.elasticsearch.index.IndexService; +import org.springframework.stereotype.Service; + +import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.EntryExitLog; +import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.TimeTracker; +import com.likelion.backendplus4.talkpick.batch.index.application.port.in.NewsIndexUseCase; +import com.likelion.backendplus4.talkpick.batch.index.application.port.out.NewsInfoIndexRepositoryPort; +import com.likelion.backendplus4.talkpick.batch.index.application.port.out.NewsInfoProviderPort; +import com.likelion.backendplus4.talkpick.batch.index.domain.model.NewsInfo; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +/** + * 뉴스 정보를 조회하고 색인 저장소에 전달하는 비즈니스 로직 서비스 + * + * @since 2025-05-15 + */ +@Slf4j +@RequiredArgsConstructor +@Service +public class NewsIndexService implements NewsIndexUseCase { + private final NewsInfoProviderPort newsInfoProviderPort; + private final NewsInfoIndexRepositoryPort newsInfoIndexRepositoryPort; + + /** + * 모든 뉴스 정보를 가져와 색인 저장소에 저장하고 저장된 건수를 반환한다. + * + * @return 색인된 뉴스 정보 건수 + * @author 정안식 + * @since 2025-05-15 + */ + @EntryExitLog() + @Override + public int indexAllNewsInfo() { + List newsInfoList = newsInfoProviderPort.fetchAll(); + return newsInfoIndexRepositoryPort.saveAll(newsInfoList); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/domain/model/NewsInfo.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/domain/model/NewsInfo.java new file mode 100644 index 0000000..0b0894c --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/domain/model/NewsInfo.java @@ -0,0 +1,21 @@ +package com.likelion.backendplus4.talkpick.batch.index.domain.model; + +import java.time.LocalDateTime; + +import lombok.Builder; +import lombok.Getter; +import lombok.RequiredArgsConstructor; + +@RequiredArgsConstructor +@Getter +@Builder +public class NewsInfo { + private final String newsId; + private final String title; + private final String content; + private final LocalDateTime publishedAt; + private final String imageUrl; + private final String category; + private final String summary; + private final float[] summaryVector; +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/infrastructure/adapter/ElasticsearchNewsInfoAdapter.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/infrastructure/adapter/ElasticsearchNewsInfoAdapter.java new file mode 100644 index 0000000..c722cf2 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/infrastructure/adapter/ElasticsearchNewsInfoAdapter.java @@ -0,0 +1,178 @@ +package com.likelion.backendplus4.talkpick.batch.index.infrastructure.adapter; + +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import org.springframework.beans.factory.annotation.Value; +import org.springframework.data.elasticsearch.core.ElasticsearchOperations; +import org.springframework.data.elasticsearch.core.IndexOperations; +import org.springframework.data.elasticsearch.core.IndexedObjectInformation; +import org.springframework.data.elasticsearch.core.RefreshPolicy; +import org.springframework.data.elasticsearch.core.document.Document; +import org.springframework.data.elasticsearch.core.mapping.IndexCoordinates; +import org.springframework.data.elasticsearch.core.query.BulkOptions; +import org.springframework.data.elasticsearch.core.query.IndexQuery; +import org.springframework.data.elasticsearch.core.query.IndexQueryBuilder; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.index.application.port.out.NewsInfoIndexRepositoryPort; +import com.likelion.backendplus4.talkpick.batch.index.domain.model.NewsInfo; +import com.likelion.backendplus4.talkpick.batch.index.infrastructure.adapter.document.NewsInfoDocument; +import com.likelion.backendplus4.talkpick.batch.index.infrastructure.adapter.mapper.NewsInfoDocumentMapper; + +import jakarta.annotation.PostConstruct; + +/** + * Spring Data Elasticsearch를 이용해 뉴스 정보를 Bulk 색인하고 저장된 개수를 반환하는 어댑터 + * + * @since 2025-05-15 + * @modified 2025-05-19 + */ +@Component +public class ElasticsearchNewsInfoAdapter implements NewsInfoIndexRepositoryPort { + + private final ElasticsearchOperations esOperations; + private final NewsInfoDocumentMapper mapper; + private final String indexName; + private IndexOperations indexOperations; + + public ElasticsearchNewsInfoAdapter(ElasticsearchOperations esOperations, + NewsInfoDocumentMapper mapper, + @Value("${news.index.name}") String indexName) { + this.esOperations = esOperations; + this.mapper = mapper; + this.indexName = indexName; + } + + /** + * 초기화 단계에서 인덱스를 준비하고 존재하지 않으면 생성한다. + * + * @author 정안식 + * @since 2025-05-15 + */ + @PostConstruct + public void initIndex() { + this.indexOperations = esOperations.indexOps(IndexCoordinates.of(indexName)); + ensureIndexExists(this.indexOperations); + } + + /** + * 뉴스 정보 리스트를 Bulk 색인하고 색인된 개수를 반환한다. + * + * @param newsList 색인할 뉴스 정보 리스트 + * @return 색인된 객체 정보 리스트의 크기 + * @author 정안식 + * @since 2025-05-15 + */ + @Override + public int saveAll(List newsList) { + List queries = toIndexQueries(newsList); + List result = bulkIndex(indexOperations, queries); + + return result.size(); + } + + /** + * 인덱스가 없으면 생성하고 매핑을 설정한다. + * + * @param ops 인덱스 운영 객체 + * @author 정안식 + * @since 2025-05-15 + */ + private void ensureIndexExists(IndexOperations ops) { + try { + if (!ops.exists()) { + ops.create(); + ops.putMapping(Document + .create() + .append("properties", mappingProperties())); + } + } catch (Exception e) { + throw new RuntimeException("Failed to create or map index [" + indexName + "]", e); + } + } + + /** + * 문서 매핑에 사용할 Elasticsearch 프로퍼티 맵을 반환한다. + * + * @return 매핑 프로퍼티 맵 + * @author 정안식 + * @since 2025-05-15 + * @modified 2025-05-19 + * 25-05-19 - summary 및 summary_vector 필드 추가 + */ + private Map mappingProperties() { + return Map.ofEntries( + Map.entry(NewsInfoDocument.FIELD_ID, Map.of( + "type", "keyword")), + Map.entry(NewsInfoDocument.FIELD_TITLE, Map.of( + "type", "text", + "analyzer", NewsInfoDocument.ANALYZER_NORI, + "fields", Map.of(NewsInfoDocument.FIELD_KEYWORD, Map.of("type", "keyword")))), + Map.entry(NewsInfoDocument.FIELD_CONTENT, Map.of( + "type", "text", + "analyzer", NewsInfoDocument.ANALYZER_NORI, + "fields", Map.of(NewsInfoDocument.FIELD_KEYWORD, Map.of("type", "keyword")))), + Map.entry(NewsInfoDocument.FIELD_PUBLISHED_AT, Map.of( + "type", "date")), + Map.entry(NewsInfoDocument.FIELD_IMAGE_URL, Map.of( + "type", "keyword")), + Map.entry(NewsInfoDocument.FIELD_CATEGORY, Map.of( + "type", "keyword")), + Map.entry(NewsInfoDocument.FIELD_SUMMARY, Map.of( + "type", "text", + "analyzer", NewsInfoDocument.ANALYZER_NORI, + "fields", Map.of(NewsInfoDocument.FIELD_KEYWORD, Map.of("type", "keyword")))), + Map.entry(NewsInfoDocument.FIELD_SUMMARY_VECTOR, Map.of( + "type", "dense_vector", + "dims", 1536, + "index", true, + "similarity", "cosine")) + ); + } + + /** + * 도메인 객체를 Elasticsearch 색인 쿼리로 변환한다. + * + * @param newsList 도메인 객체 리스트 + * @return 색인 쿼리 리스트 + * @author 정안식 + * @since 2025-05-15 + */ + private List toIndexQueries(List newsList) { + return newsList.stream() + .map(n -> new IndexQueryBuilder() + .withId(n.getNewsId()) + .withObject(mapper.toDocument(n)) + .build()) + .collect(Collectors.toList()); + } + + /** + * Bulk 옵션을 사용해 쿼리를 실행하고 결과 정보를 반환한다. + * + * @param indexOperations 인덱스 운영 객체 + * @param queries 색인 쿼리 리스트 + * @return 색인 결과 객체 정보 리스트 + * @author 정안식 + * @since 2025-05-15 + */ + private List bulkIndex(IndexOperations indexOperations, + List queries) { + + try { + BulkOptions bulkOptions = BulkOptions.builder() + .withRefreshPolicy(RefreshPolicy.NONE) + .build(); + + return esOperations.bulkIndex( + queries, + bulkOptions, + indexOperations.getIndexCoordinates() + ); + } catch (Exception e) { + throw new RuntimeException("Failed to bulk index documents into [" + indexName + "]", e); + } + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/infrastructure/adapter/document/NewsInfoDocument.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/infrastructure/adapter/document/NewsInfoDocument.java new file mode 100644 index 0000000..9ba5ec1 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/infrastructure/adapter/document/NewsInfoDocument.java @@ -0,0 +1,39 @@ +package com.likelion.backendplus4.talkpick.batch.index.infrastructure.adapter.document; + +import java.time.LocalDateTime; + +import lombok.AllArgsConstructor; +import lombok.Getter; + +/** + * Elasticsearch에 저장될 뉴스 정보 문서 모델 클래스 + * + * @since 2025-05-19 + * @modified 2025-05-19 + * 25-05-19 - summary, summaryVector 필드 추가 + */ +@Getter +@AllArgsConstructor +public class NewsInfoDocument { + private final String newsId; + private final String title; + private final String content; + private final LocalDateTime publishedAt; + private final String imageUrl; + private final String category; + private final String summary; + private final float[] summaryVector; + + public static final String FIELD_ID = "newsId"; + public static final String FIELD_TITLE = "title"; + public static final String FIELD_CONTENT = "content"; + public static final String FIELD_PUBLISHED_AT = "publishedAt"; + public static final String FIELD_IMAGE_URL = "imageUrl"; + public static final String FIELD_CATEGORY = "category"; + public static final String FIELD_SUMMARY = "summary"; + public static final String FIELD_SUMMARY_VECTOR = "summaryVector"; + + public static final String ANALYZER_NORI = "nori"; + public static final String FIELD_KEYWORD = "keyword"; +} + diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/infrastructure/adapter/mapper/NewsInfoDocumentMapper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/infrastructure/adapter/mapper/NewsInfoDocumentMapper.java new file mode 100644 index 0000000..1bcbb1b --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/infrastructure/adapter/mapper/NewsInfoDocumentMapper.java @@ -0,0 +1,39 @@ +package com.likelion.backendplus4.talkpick.batch.index.infrastructure.adapter.mapper; + +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.index.domain.model.NewsInfo; +import com.likelion.backendplus4.talkpick.batch.index.infrastructure.adapter.document.NewsInfoDocument; + +/** + * 도메인 모델 NewsInfo를 Elasticsearch 문서 모델로 변환하는 매퍼 + * + * @since 2025-05-15 + * @modified 2025-05-19 + */ +@Component +public class NewsInfoDocumentMapper { + /** + * NewsInfo 도메인 객체를 NewsInfoDocument로 변환한다. + * + * @param news 변환할 도메인 객체 + * @return 변환된 문서 객체 + * @author 정안식 + * @since 2025-05-15 + * @modified 2025-05-19 + * 25-05-19 - summary, summaryVector 필드 추가 + */ + public NewsInfoDocument toDocument(NewsInfo news) { + return new NewsInfoDocument( + news.getNewsId(), + news.getTitle(), + news.getContent(), + news.getPublishedAt(), + news.getImageUrl(), + news.getCategory(), + news.getSummary(), + news.getSummaryVector() + ); + } +} + diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/index/presentation/controller/NewsIndexController.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/presentation/controller/NewsIndexController.java new file mode 100644 index 0000000..563c25d --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/index/presentation/controller/NewsIndexController.java @@ -0,0 +1,41 @@ +package com.likelion.backendplus4.talkpick.batch.index.presentation.controller; + +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.EntryExitLog; +import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.TimeTracker; +import com.likelion.backendplus4.talkpick.batch.common.response.ApiResponse; +import com.likelion.backendplus4.talkpick.batch.index.application.port.in.NewsIndexUseCase; + +import lombok.RequiredArgsConstructor; + +/** + * 뉴스 데이터 일괄 색인을 위한 REST 컨트롤러 + * + * @since 2025-05-15 + */ +@RestController +@RequestMapping("/news") +@RequiredArgsConstructor +public class NewsIndexController { + + private final NewsIndexUseCase indexUseCase; + + /** + * 전체 뉴스 정보를 색인하고 처리 건수를 반환한다. + * + * @return ApiResponse에 래핑된 색인된 뉴스 건수 + * @author 정안식 + * @since 2025-05-15 + */ + @EntryExitLog + @TimeTracker + @PostMapping("/index") + public ResponseEntity> indexAllNews() { + int count = indexUseCase.indexAllNewsInfo(); + return ApiResponse.success(count); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/application/port/in/ArticleCollectorUseCase.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/application/port/in/ArticleCollectorUseCase.java new file mode 100644 index 0000000..9e0643b --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/application/port/in/ArticleCollectorUseCase.java @@ -0,0 +1,26 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.application.port.in; + +import com.likelion.backendplus4.talkpick.batch.news.article.application.service.dto.ArticleCollectorStatusResponse; + +/** + * 뉴스 RSS 수집 실행/정지를 위한 유스케이스 인터페이스. + * RSS 수집기를 제어하고 현재 상태 정보를 반환한다. + * + * @since 2025-05-10 + */ +public interface ArticleCollectorUseCase { + + /** + * 수집 스케줄을 시작한다. + * + * @return 수집기의 상태 정보를 담은 응답 객체 + */ + ArticleCollectorStatusResponse start(); + + /** + * 뉴스 스케줄을 중단한다. + * + * @return 수집기의 상태 정보를 담은 응답 객체 + */ + ArticleCollectorStatusResponse stop(); +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/application/port/out/CollectorPort.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/application/port/out/CollectorPort.java new file mode 100644 index 0000000..d0363e2 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/application/port/out/CollectorPort.java @@ -0,0 +1,33 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.application.port.out; + +/** + * 뉴스 수집 스케줄 제어를 위한 외부 시스템 연동 포트 인터페이스.

+ * Quartz Scheduler 등의 외부 실행 환경을 시작/정지하거나 상태를 확인할 수 있도록 정의한다.

+ * + * 이 포트는 Adapter를 통해 실제 구현되며,

+ * 유스케이스 계층에서는 이 인터페이스만 의존한다. + * + * @since 2025-05-10 + */ +public interface CollectorPort { + /** + * 스케줄 실행을 요청한다. + * + * @return 실행 요청이 성공하고 실제로 실행 중이면 true + */ + boolean start(); + + /** + * 스케줄 정지를 요청한다. + * + * @return 정지 요청이 성공하면 true + */ + boolean stop(); + + /** + * 현재 실행 중인지 확인한다. + * + * @return 실행 중이면 true, 아니면 false + */ + boolean isRunning(); +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/application/service/ArticleCollectorService.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/application/service/ArticleCollectorService.java new file mode 100644 index 0000000..b700025 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/application/service/ArticleCollectorService.java @@ -0,0 +1,65 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.application.service; + +import org.springframework.stereotype.Service; + +import com.likelion.backendplus4.talkpick.batch.news.article.application.port.in.ArticleCollectorUseCase; +import com.likelion.backendplus4.talkpick.batch.news.article.application.port.out.CollectorPort; +import com.likelion.backendplus4.talkpick.batch.news.article.application.service.dto.ArticleCollectorStatusResponse; + +import lombok.RequiredArgsConstructor; + +/** + * 뉴스 기사 수집 스케줄러를 제어하는 유스케이스 구현체. + * 수집기 실행 및 중단 요청을 처리하고, 그 결과를 상태 응답으로 반환한다. + * + * 내부적으로 {@link CollectorPort}를 호출하여 Quartz Scheduler 상태를 제어하며, + * 실행 결과에 따라 성공/실패 메시지를 포함한 {@link ArticleCollectorStatusResponse}를 생성한다. + * + * @author 함예정 + * @since 2025-05-11 + */ +@Service +@RequiredArgsConstructor +public class ArticleCollectorService implements ArticleCollectorUseCase { + private final CollectorPort collectorPort; + + /** + * 수집기를 실행한다. + * 실행 성공 여부에 따라 상태 응답을 생성한다. + * + * @return 실행 결과에 대한 상태 응답 + * @author 함예정 + * @since 2025-05-11 + */ + @Override + public ArticleCollectorStatusResponse start() { + boolean result = collectorPort.start(); + return getCollectorStatusResponse(result); + } + + /** + * 수집기를 정지한다. + * 정지 성공 여부에 따라 상태 응답을 생성한다. + * + * @return 정지 결과에 대한 상태 응답 + * @author 함예정 + * @since 2025-05-11 + */ + @Override + public ArticleCollectorStatusResponse stop() { + boolean result = collectorPort.stop(); + return getCollectorStatusResponse(result); + } + + /** + * 실행 결과에 따라 응답 메시지를 구성한다. + * + * @param result CollectorPort 실행 결과 + * @return 상태 응답 객체 + * @author 함예정 + * @since 2025-05-11 + */ + private ArticleCollectorStatusResponse getCollectorStatusResponse(boolean result) { + return new ArticleCollectorStatusResponse(result); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/application/service/dto/ArticleCollectorStatusResponse.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/application/service/dto/ArticleCollectorStatusResponse.java new file mode 100644 index 0000000..7ff4aec --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/application/service/dto/ArticleCollectorStatusResponse.java @@ -0,0 +1,26 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.application.service.dto; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.RequiredArgsConstructor; + +/** + * 실행 상태를 클라이언트에 전달하기 위한 응답 DTO. + * + * @author 함예정 + * @since 2025-05-11 + */ +@Getter +public class ArticleCollectorStatusResponse { + private final boolean running; + private final String message; + + private final String failMessage = "처리에 실패했습니다"; + private final String successMessage = "요청이 성공적으로 전달 됐습니다"; + + + public ArticleCollectorStatusResponse(boolean running) { + this.running = running; + this.message = running ? successMessage : failMessage; + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/exception/ArticleCollectorException.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/exception/ArticleCollectorException.java new file mode 100644 index 0000000..95a228f --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/exception/ArticleCollectorException.java @@ -0,0 +1,35 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.exception; + +import com.likelion.backendplus4.talkpick.batch.common.exception.CustomException; +import com.likelion.backendplus4.talkpick.batch.common.exception.error.ErrorCode; + +/** + * 뉴스 기사 수집 관련 예외처리 클래스 + * + * @since 2025-05-10 + * @modified 2025-05-11 + * - 클래스에서 저자 삭제 (메소드에 저자 추가) + * - 코드 컨벤션에 맞도록 CustomException 상속으로 변경 (변경 전: RuntimeException 상속) + */ +public class ArticleCollectorException extends CustomException { + private final ErrorCode errorCode; + + public ArticleCollectorException(ErrorCode errorCode) { + super(errorCode); + this.errorCode = errorCode; + } + + public ArticleCollectorException(ErrorCode errorCode, Throwable cause) { + super(errorCode); + this.errorCode = errorCode; + } + + /** + * + * @return + */ + @Override + public ErrorCode getErrorCode() { + return errorCode; + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/exception/BatchJobExceptionTranslator.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/exception/BatchJobExceptionTranslator.java new file mode 100644 index 0000000..0dd339d --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/exception/BatchJobExceptionTranslator.java @@ -0,0 +1,37 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.exception; + +import java.util.Map; + +import org.springframework.batch.core.JobParametersInvalidException; +import org.springframework.batch.core.repository.JobExecutionAlreadyRunningException; +import org.springframework.batch.core.repository.JobInstanceAlreadyCompleteException; +import org.springframework.batch.core.repository.JobRestartException; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; + +/** + * Spring Batch 작업 실행 중 발생할 수 있는 예외를

+ * {@link ArticleCollectorErrorCode}로 변환하는 컴포넌트입니다. + */ +@Component +public class BatchJobExceptionTranslator { + private static final Map, ArticleCollectorErrorCode> CODE_MAP = + Map.of( + JobExecutionAlreadyRunningException.class, ArticleCollectorErrorCode.JOB_ALREADY_RUNNING, + JobRestartException.class, ArticleCollectorErrorCode.JOB_RESTART_FAIL, + JobInstanceAlreadyCompleteException.class, ArticleCollectorErrorCode.JOB_ALREADY_COMPLETE, + JobParametersInvalidException.class, ArticleCollectorErrorCode.INVALID_JOB_PARAMETER + ); + + /** + * 주어진 예외를 해당하는 {@link ArticleCollectorErrorCode}로 변환합니다. + * 정의되지 않은 예외 클래스의 경우 {@code UNKNOWN_ERROR}를 반환합니다. + * + * @param e 변환할 예외 객체 + * @return 매핑된 {@link ArticleCollectorErrorCode} + */ + public ArticleCollectorErrorCode translate(Exception e) { + return CODE_MAP.getOrDefault(e.getClass(), ArticleCollectorErrorCode.UNKNOWN_ERROR); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/exception/error/ArticleCollectorErrorCode.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/exception/error/ArticleCollectorErrorCode.java new file mode 100644 index 0000000..30f0f58 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/exception/error/ArticleCollectorErrorCode.java @@ -0,0 +1,75 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.exception.error; + +import org.springframework.http.HttpStatus; + +import com.likelion.backendplus4.talkpick.batch.common.exception.error.ErrorCode; + +import lombok.Getter; +import lombok.RequiredArgsConstructor; + +/** + * 뉴스 기사 수집 관련 오류 코드를 정의하는 열거형 + * + * @since 2025-05-10 + * @modified 2025-05-11 + * - 클래스 주석에서 저자 삭제 + * - 예외 전역 처리를 위해 상속 구조로 변경 + * - 예외 전역 처리로 알 수 없는 오류 코드 삭제 + */ +@Getter +@RequiredArgsConstructor +public enum ArticleCollectorErrorCode implements ErrorCode { + + // 실행 오류 + JOB_ALREADY_RUNNING(HttpStatus.BAD_REQUEST, 150001, "[Quartz] Batch 실행 실패 - 현재 Job이 이미 실행 중입니다."), + JOB_ALREADY_COMPLETE(HttpStatus.BAD_REQUEST, 150002, "[Quartz] Batch 실행 실패 - 동일한 JobParameters로 실행된 Job이 이미 완료되었습니다."), + UNKNOWN_ERROR(HttpStatus.INTERNAL_SERVER_ERROR, 550001, "[Quartz] Batch 실행 중 알 수 없는 예외 발생"), + JOB_RESTART_FAIL(HttpStatus.INTERNAL_SERVER_ERROR, 550002, "[Quartz] Batch 실행 실패 - Job을 재시작할 수 없습니다. 이전 실행 상태가 불안정하거나 종료되지 않았을 수 있습니다."), + INVALID_JOB_PARAMETER(HttpStatus.INTERNAL_SERVER_ERROR, 550003, "[Quartz] Batch 실행 실패 - JobParameters가 유효하지 않습니다. 필수 파라미터 누락 또는 형식 오류일 수 있습니다."), + SCHEDULER_START_FAIL(HttpStatus.INTERNAL_SERVER_ERROR, 550004, "[Quartz] 스케줄러 시작 실패"), + SCHEDULER_STOP_FAIL(HttpStatus.INTERNAL_SERVER_ERROR, 550005, "[Quartz] 스케줄러 중지 실패"), + STATUS_CHECK_FAIL(HttpStatus.INTERNAL_SERVER_ERROR, 550006, "상태 확인 실패"), + + // RSS 정보 로드 관련 오류 + FEED_CONNECTION_ERROR(HttpStatus.INTERNAL_SERVER_ERROR, 450001,"RSS 피드 연결 중 오류가 발생했습니다."), + FEED_PARSING_ERROR(HttpStatus.INTERNAL_SERVER_ERROR, 450002,"RSS 피드 파싱 중 오류가 발생했습니다."), + FEED_TIMEOUT_ERROR(HttpStatus.INTERNAL_SERVER_ERROR, 450003, "RSS 피드 로드 중 시간 초과가 발생했습니다."), + + // Mapper 관련 오류 + RSS_CONTENT_EMPTY(HttpStatus.INTERNAL_SERVER_ERROR, 450012, "RSS 피드 내용이 비어있습니다."), + RSS_PARSING_ERROR(HttpStatus.INTERNAL_SERVER_ERROR, 450013, "RSS 피드 내용 파싱 중 오류가 발생했습니다."), + RSS_IMAGE_MISSING(HttpStatus.INTERNAL_SERVER_ERROR, 450014, "RSS 피드에서 이미지를 찾을 수 없습니다."), + ARTICLE_ID_EXTRACTION_ERROR(HttpStatus.INTERNAL_SERVER_ERROR, 450015, "기사 ID 추출 중 오류가 발생했습니다."), + MAPPER_NOT_FOUND(HttpStatus.INTERNAL_SERVER_ERROR, 450004, "요청한 매퍼를 찾을 수 없습니다."), + ITEM_MAPPING_ERROR(HttpStatus.INTERNAL_SERVER_ERROR, 450005,"RSS 항목 매핑 중 오류가 발생했습니다."), + + // 스크래퍼 관련 오류 + SCRAPER_NOT_FOUND(HttpStatus.INTERNAL_SERVER_ERROR, 450007, "요청한 스크래퍼를 찾을 수 없습니다."), + SCRAPER_CONNECTION_ERROR(HttpStatus.INTERNAL_SERVER_ERROR, 450008, "기사 웹페이지 연결 중 오류가 발생했습니다."), + SCRAPER_PARSING_ERROR(HttpStatus.INTERNAL_SERVER_ERROR, 450009, "기사 내용 파싱 중 오류가 발생했습니다."), + EMPTY_ARTICLE_CONTENT(HttpStatus.INTERNAL_SERVER_ERROR, 450010, "스크래핑된 기사 내용이 없습니다."), + EMPTY_ARTICLE_IMAGE(HttpStatus.INTERNAL_SERVER_ERROR, 450011, "스크래핑된 기사 이미지가 없습니다."), + + // 데이터베이스 관련 오류 + DB_SAVE_ERROR(HttpStatus.INTERNAL_SERVER_ERROR, 530001, "RSS 뉴스를 저장하는 중 오류가 발생했습니다."), + DUPLICATE_LINK_ERROR(HttpStatus.INTERNAL_SERVER_ERROR, 450006, "이미 존재하는 링크입니다."); + + private final HttpStatus status; + private final int code; + private final String message; + + @Override + public HttpStatus httpStatus() { + return status; + } + + @Override + public int codeNumber() { + return code; + } + + @Override + public String message() { + return message; + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/adapter/CollectorAdapter.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/adapter/CollectorAdapter.java new file mode 100644 index 0000000..e894fb8 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/adapter/CollectorAdapter.java @@ -0,0 +1,103 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.adapter; + +import org.quartz.Scheduler; +import org.quartz.SchedulerException; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.application.port.out.CollectorPort; +import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; +import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; + +import lombok.RequiredArgsConstructor; + +/** + * Quartz Scheduler 를 제어하는 CollectorPort 구현체. + * 외부 요청에 따라 스케줄러를 시작하거나 정지하며, + * 현재 실행 중인지 상태를 확인할 수 있다. + * + * @since 2025-05-10 + */ +@Component +@RequiredArgsConstructor +public class CollectorAdapter implements CollectorPort { + private final Scheduler scheduler; + + /** + * Quartz 스케줄러를 시작하고, 정상적으로 시작되었는지 상태를 확인한다. + * + * @return 스케줄러가 실행 중이면 true, 그렇지 않으면 false + * @author 함예정 + * @since 2025-05-10 + */ + @Override + public boolean start() { + return startScheduler(); + } + + /** + * Quartz 스케줄러를 standby 상태로 전환하여 정지한다.

+ * 이미 standby 상태인 경우에는 아무 작업도 하지 않는다. + * + * @return 정지 요청이 성공했으면 true + * @author 함예정 + * @since 2025-05-10 + */ + @Override + public boolean stop() { + try { + if (!scheduler.isInStandbyMode()) { + scheduler.standby(); + } + return true; + } catch (SchedulerException e) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.SCHEDULER_STOP_FAIL, e); + } + } + + /** + * 스케줄러가 실행 상태 플래그 확인 메소드 + * + * @return 스케줄러가 실행 중이면 true, 그렇지 않으면 false + * @author 함예정 + * @since 2025-05-10 + */ + @Override + public boolean isRunning() { + return checkSchedulerStatus(); + } + + /** + * Quartz 스케줄러를 세부 시작 메소드 + * 1. 실행 요청 + * 2. 실행 상태 플래그 반환 + * + * @return 스케줄러가 실행 중이면 true, 그렇지 않으면 false + * @throws ArticleCollectorException SchedulerException 발생으로 실행 실패 시 + * @author 함예정 + * @since 2025-05-11 + */ + private boolean startScheduler() { + try { + scheduler.start(); + return isRunning(); + } catch (SchedulerException e) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.SCHEDULER_START_FAIL, e); + } + } + + /** + * 스케줄러의 현재 상태를 확인합니다. + * + * @return 스케줄러가 시작되었고 대기 모드가 아닌 경우 true, 그렇지 않으면 false + * @throws ArticleCollectorException 스케줄러 상태 확인 중 예외 발생 시 커스텀 예외로 래핑하여 던짐 + * @author 함예정 + * @since 2025-05-11 + */ + private boolean checkSchedulerStatus() { + try { + return scheduler.isStarted() && !scheduler.isInStandbyMode(); + } catch (SchedulerException e) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.STATUS_CHECK_FAIL, e); + } + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/batch/JobConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/batch/JobConfig.java new file mode 100644 index 0000000..3d45269 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/batch/JobConfig.java @@ -0,0 +1,45 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.batch; + +import org.springframework.batch.core.Job; +import org.springframework.batch.core.Step; +import org.springframework.batch.core.job.builder.JobBuilder; +import org.springframework.batch.core.repository.JobRepository; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +/** + * Spring Batch Job 설정 클래스.

+ * RSS 기사 수집을 위한 Batch Job 을 정의하며, 파티셔닝된 Step 을 시작 단계로 구성한다.

+ * + * 이 잡은 {@code articleCollectorBatchJob}이라는 이름으로 정의되며, + * {@link Step} 객체는 외부에서 주입받아 사용한다.

+ * + * 해당 Job 은 Quartz 또는 Spring Scheduler 를 통해 주기적으로 실행될 수 있다.

+ * + * @since 2025-05-10 + */ +@Configuration +public class JobConfig { + private final String jobName = "articleCollectorBatchJob"; + private final JobRepository jobRepository; + private final Step rssPartitionedStep; + + public JobConfig(JobRepository jobRepository, Step articleRssPartitionedStep) { + this.jobRepository = jobRepository; + this.rssPartitionedStep = articleRssPartitionedStep; + } + + /** + * RSS 기사 수집용 Spring Batch Job Bean을 생성한다. + * 파티셔닝 Step 을 실행하도록 구성한다. + * + * @return RSS 기사 수집 배치 Job + * @author 함예정 + * @since 2025-05-10 + */ + @Bean + public Job articleCollectJob() { + return new JobBuilder(jobName, jobRepository) + .start(rssPartitionedStep) + .build(); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/batch/NewsCategory.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/batch/NewsCategory.java new file mode 100644 index 0000000..6e33563 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/batch/NewsCategory.java @@ -0,0 +1,27 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.batch; + +/** + * 뉴스 기사 표준 카테고리 + * + * @author 양병학 + * @since 2025-05-12 + */ +public enum NewsCategory { + POLITICS("정치"), + ECONOMY("경제"), + SOCIETY("사회"), + INTERNATIONAL("국제"), + ENTERTAINMENT("연예"), + SPORTS("스포츠"), + TOTAL("전체"); + + private final String displayName; + + NewsCategory(String displayName) { + this.displayName = displayName; + } + + public String getDisplayName() { + return displayName; + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/batch/RssSource.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/batch/RssSource.java new file mode 100644 index 0000000..8e5c3cc --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/batch/RssSource.java @@ -0,0 +1,177 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.batch; + +import lombok.Getter; + +import java.util.*; +import java.util.stream.Collectors; + +/** + * RSS 뉴스 소스와 URL을 정의하는 열거형 + * 각 항목은 언론사, 카테고리, URL 정보를 포함 + * + * @author 양병학 + * @modified 2025-05-12 표준 카테고리(NewsCategory) 도입 및 동아일보, 경향신문 카테고리별 피드 추가 + * @since 2025-05-10 + */ +@Getter +public enum RssSource { + // 국민일보 RSS 피드 + KMIB_POLITICS("국민일보", NewsCategory.POLITICS, "https://www.kmib.co.kr/rss/data/kmibPolRss.xml", "km", true, true), + KMIB_ECONOMY("국민일보", NewsCategory.ECONOMY, "https://www.kmib.co.kr/rss/data/kmibEcoRss.xml", "km", true, true), + KMIB_SOCIETY("국민일보", NewsCategory.SOCIETY, "https://www.kmib.co.kr/rss/data/kmibSocRss.xml", "km", true, true), + KMIB_INTERNATIONAL("국민일보", NewsCategory.INTERNATIONAL, "https://www.kmib.co.kr/rss/data/kmibIntRss.xml", "km", true, true), + KMIB_ENTERTAINMENT("국민일보", NewsCategory.ENTERTAINMENT, "https://www.kmib.co.kr/rss/data/kmibEntRss.xml", "km", true, true), + KMIB_SPORTS("국민일보", NewsCategory.SPORTS, "https://www.kmib.co.kr/rss/data/kmibSpoRss.xml", "km", true, true), + + // 동아일보 RSS 피드 + DONGA_POLITICS("동아일보", NewsCategory.POLITICS, "https://rss.donga.com/politics.xml", "da", true, false), + DONGA_ECONOMY("동아일보", NewsCategory.ECONOMY, "https://rss.donga.com/economy.xml", "da", true, false), + DONGA_SOCIETY("동아일보", NewsCategory.SOCIETY, "https://rss.donga.com/national.xml", "da", true, false), + DONGA_INTERNATIONAL("동아일보", NewsCategory.INTERNATIONAL, "https://rss.donga.com/international.xml", "da", true, false), + DONGA_ENTERTAINMENT("동아일보", NewsCategory.ENTERTAINMENT, "https://rss.donga.com/entertainment.xml", "da", true, false), + DONGA_SPORTS("동아일보", NewsCategory.SPORTS, "https://rss.donga.com/sports.xml", "da", true, false), + + // 경향신문 RSS 피드 + KHAN_POLITICS("경향신문", NewsCategory.POLITICS, "https://www.khan.co.kr/rss/rssdata/politic_news.xml", "kh", true, false), + KHAN_ECONOMY("경향신문", NewsCategory.ECONOMY, "https://www.khan.co.kr/rss/rssdata/economy_news.xml", "kh", true, false), + KHAN_SOCIETY("경향신문", NewsCategory.SOCIETY, "https://www.khan.co.kr/rss/rssdata/society_news.xml", "kh", true, false), + KHAN_INTERNATIONAL("경향신문", NewsCategory.INTERNATIONAL, "https://www.khan.co.kr/rss/rssdata/world_news.xml", "kh", true, false), + KHAN_ENTERTAINMENT("경향신문", NewsCategory.ENTERTAINMENT, "https://www.khan.co.kr/rss/rssdata/art_news.xml", "kh", true, false), + KHAN_SPORTS("경향신문", NewsCategory.SPORTS, "https://www.khan.co.kr/rss/rssdata/sports_news.xml", "kh", true, false); + + /* + // MBN RSS 피드 + MBN_POLITICS("MBN", NewsCategory.POLITICS, "https://www.mbn.co.kr/rss/politics/", "mb", true,false), + MBN_ECONOMY("MBN", NewsCategory.ECONOMY, "https://www.mbn.co.kr/rss/economy/", "mb", true,false), + MBN_SOCIETY("MBN", NewsCategory.SOCIETY, "https://www.mbn.co.kr/rss/society/", "mb", true,false), + MBN_INTERNATIONAL("MBN", NewsCategory.INTERNATIONAL, "https://www.mbn.co.kr/rss/international/", "mb", true,false), + MBN_ENTERTAINMENT("MBN", NewsCategory.ENTERTAINMENT, "https://www.mbn.co.kr/rss/enter/", "mb", true,false), + MBN_SPORTS("MBN", NewsCategory.SPORTS, "https://www.mbn.co.kr/rss/sports/", "mb", true,false), + + // 조선일보 RSS 피드 + CHOSUN_POLITICS("조선일보", NewsCategory.POLITICS, "https://www.chosun.com/arc/outboundfeeds/rss/category/politics/?outputType=xml", "cs", true,false), + CHOSUN_ECONOMY("조선일보", NewsCategory.ECONOMY, "https://www.chosun.com/arc/outboundfeeds/rss/category/economy/?outputType=xml", "cs", true,false), + CHOSUN_SOCIETY("조선일보", NewsCategory.SOCIETY, "https://www.chosun.com/arc/outboundfeeds/rss/category/national/?outputType=xml", "cs", true,false), + CHOSUN_INTERNATIONAL("조선일보", NewsCategory.INTERNATIONAL, "https://www.chosun.com/arc/outboundfeeds/rss/category/international/?outputType=xml", "cs", true,false), + CHOSUN_ENTERTAINMENT("조선일보", NewsCategory.ENTERTAINMENT, "https://www.chosun.com/arc/outboundfeeds/rss/category/entertainments/?outputType=xml", "cs", true,false), + CHOSUN_SPORTS("조선일보", NewsCategory.SPORTS, "https://www.chosun.com/arc/outboundfeeds/rss/category/sports/?outputType=xml", "cs", true,false), + + // 한겨레 RSS 피드 + HANI_POLITICS("한겨레", NewsCategory.POLITICS, "https://www.hani.co.kr/rss/politics/", "hn", true,false), + HANI_ECONOMY("한겨레", NewsCategory.ECONOMY, "https://www.hani.co.kr/rss/economy/", "hn", true,false), + HANI_SOCIETY("한겨레", NewsCategory.SOCIETY, "https://www.hani.co.kr/rss/society/", "hn", true,false), + HANI_INTERNATIONAL("한겨레", NewsCategory.INTERNATIONAL, "https://www.hani.co.kr/rss/international/", "hn", true,false), + HANI_ENTERTAINMENT("한겨레", NewsCategory.ENTERTAINMENT, "https://www.hani.co.kr/rss/culture/", "hn", true,false), + HANI_SPORTS("한겨레", NewsCategory.SPORTS, "https://www.hani.co.kr/rss/sports/", "hn", true,false), + + // 한국경제 RSS 피드 + HANKYUNG_POLITICS("한국경제", NewsCategory.POLITICS, "https://www.hankyung.com/feed/politics", "hk", true,false), + HANKYUNG_ECONOMY("한국경제", NewsCategory.ECONOMY, "https://www.hankyung.com/feed/economy", "hk", true,false), + HANKYUNG_SOCIETY("한국경제", NewsCategory.SOCIETY, "https://www.hankyung.com/feed/society", "hk", true,false), + HANKYUNG_INTERNATIONAL("한국경제", NewsCategory.INTERNATIONAL, "https://www.hankyung.com/feed/international", "hk", true,false), + HANKYUNG_ENTERTAINMENT("한국경제", NewsCategory.ENTERTAINMENT, "https://www.hankyung.com/feed/entertainment", "hk", true,false), + HANKYUNG_SPORTS("한국경제", NewsCategory.SPORTS, "https://www.hankyung.com/feed/sports", "hk", true,false); + */ + + private final String publisherName; + private final NewsCategory category; + private final String url; + private final String mapperType; + private final boolean enabled; + private final boolean hasFullContent; + + RssSource(String publisherName, NewsCategory category, String url, String mapperType, boolean enabled, boolean hasFullContent) { + this.publisherName = publisherName; + this.category = category; + this.url = url; + this.mapperType = mapperType; + this.enabled = enabled; + this.hasFullContent = hasFullContent; + } + + /** + * 카테고리 이름 반환 + * + * @return 카테고리 표시 이름 + */ + public String getCategoryName() { + return category.getDisplayName(); + } + + /** + * 언론사 이름과 카테고리를 결합한 표시 이름 반환 + */ + public String getDisplayName() { + return publisherName + "-" + getCategoryName(); + } + + /** + * 매퍼 타입 키 반환 + */ + public String getMapperType() { + return mapperType; + } + + /** + * 언론사 코드 접두사 반환 (대문자) + */ + public String getCodePrefix() { + return mapperType.toUpperCase(); + } + + /** + * 활성화된 모든 소스 반환 + */ + public static List getEnabledSources() { + return Arrays.stream(values()) + .filter(RssSource::isEnabled) + .collect(Collectors.toList()); + } + + /** + * RSS에 전체 내용 포함 여부 반환 + * + * @return 전체 내용 포함 여부 + */ + public boolean hasFullContent() { + return hasFullContent; + } + + /** + * 활성화된 RSS 소스 목록에서 매퍼 타입(언론사)별로 하나만 선택하여 반환 + * + * @return 중복 제거된 RSS 소스 목록(언론사당 하나) + */ + public static List getUniqueMapperSources() { + Map uniqueSources = new HashMap<>(); + + for (RssSource source : getEnabledSources()) { + String mapperType = source.getMapperType(); + uniqueSources.putIfAbsent(mapperType, source); + } + + return new ArrayList<>(uniqueSources.values()); + } + + /** + * 특정 언론사의 모든 소스 반환 + */ + public static List getSourcesByPublisher(String publisherName) { + return Arrays.stream(values()) + .filter(source -> source.getPublisherName().equals(publisherName)) + .collect(Collectors.toList()); + } + + /** + * 특정 카테고리의 모든 소스 반환 + * + * @param category 검색할 카테고리 + * @return 해당 카테고리의 활성화된 소스 목록 + */ + public static List getSourcesByCategory(NewsCategory category) { + return Arrays.stream(values()) + .filter(RssSource::isEnabled) + .filter(source -> source.getCategory() == category) + .collect(Collectors.toList()); + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/batch/StepConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/batch/StepConfig.java new file mode 100644 index 0000000..03143a1 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/batch/StepConfig.java @@ -0,0 +1,104 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.batch; + +import java.util.List; + +import org.springframework.batch.core.Step; +import org.springframework.batch.core.partition.support.Partitioner; +import org.springframework.batch.core.repository.JobRepository; +import org.springframework.batch.core.step.builder.StepBuilder; +import org.springframework.batch.item.ItemProcessor; +import org.springframework.batch.item.ItemReader; +import org.springframework.batch.item.ItemWriter; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.core.task.TaskExecutor; +import org.springframework.transaction.PlatformTransactionManager; + +import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; + +/** + * Spring Batch Step 구성 클래스. + * RSS 기사 수집을 위한 병렬 처리용 파티셔닝 Step과 + * 실제 처리 로직이 포함된 청크 기반 Step을 설정한다. + * + * - articleRssPartitionedStep: 소스 데이터를 파티셔닝하여 병렬로 처리 + * - parseRssStep: 각 파티션 단위에서 RSS 데이터를 읽고, 가공 후 저장 + * + * @since 2025-05-10 + */ +@Configuration +public class StepConfig { + private final String executorName = "normalExecutor"; + private static final String partitionedStepName = "articleRssPartitionedStep"; + private final String parseRssStepName = "parseRssStep"; + private final int gridSize = 3; + private final int chunkSize = 10; + private final int retryLimit = 3; + private final int skipLimit = 100; + private final JobRepository jobRepository; + private final Partitioner rssSourcePartitioner; + private final PlatformTransactionManager transactionManager; + private final TaskExecutor taskExecutor; + private final ItemProcessor> processor; + private final ItemWriter> writer; + + public StepConfig(JobRepository jobRepository, + Partitioner rssSourcePartitioner, + PlatformTransactionManager platformTransactionManager, + @Qualifier(executorName) + TaskExecutor taskExecutor, + ItemProcessor> processor, + ItemWriter> writer) { + this.jobRepository = jobRepository; + this.rssSourcePartitioner = rssSourcePartitioner; + this.transactionManager = platformTransactionManager; + this.taskExecutor = taskExecutor; + this.processor = processor; + this.writer = writer; + } + + /** + * RSS 소스 데이터를 파티셔닝하여 병렬로 처리하는 Step을 정의한다. + * 내부적으로 {@code parseRssStep}을 병렬 실행하며, TaskExecutor를 통해 스레드 분산 처리한다. + * + * @param parseRssStep 파티션마다 실행될 실제 처리 Step + * @return 파티셔닝 기반 Step + * @author 함예정 + * @since 2025-05-10 + */ + @Bean + public Step articleRssPartitionedStep(Step parseRssStep) { + return new StepBuilder(partitionedStepName, jobRepository) + .partitioner(parseRssStep.getName(), rssSourcePartitioner) + .step(parseRssStep) + .taskExecutor(taskExecutor) + .gridSize(gridSize) + .build(); + } + + /** + * RSS 데이터를 청크 단위로 읽고, 가공하고, 저장하는 Step을 정의한다. + * 예외 발생 시 지정된 예외 클래스는 skip 처리되며, {@code skipLimit} 이하까지 허용된다. + * + * @param articleReader RSS 데이터 소스를 읽는 Reader + * @return RSS 처리용 Step + * @author 함예정 + * @since 2025-05-10 + */ + @Bean + public Step parseRssStep(ItemReader articleReader) { + return new StepBuilder(parseRssStepName, jobRepository) + .>chunk(chunkSize, transactionManager) + .reader(articleReader) + .processor(processor) + .writer(writer) + .faultTolerant() + .retry(ArticleCollectorException.class) + .retryLimit(retryLimit) + .skip(ArticleCollectorException.class) + .skipLimit(skipLimit) + .build(); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/executor/BatchJobExecutor.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/executor/BatchJobExecutor.java new file mode 100644 index 0000000..2421b0c --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/executor/BatchJobExecutor.java @@ -0,0 +1,69 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.executor; + +import org.quartz.DisallowConcurrentExecution; +import org.quartz.JobExecutionContext; +import org.springframework.batch.core.Job; +import org.springframework.batch.core.JobParameters; +import org.springframework.batch.core.JobParametersBuilder; +import org.springframework.batch.core.launch.JobLauncher; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.exception.BatchJobExceptionTranslator; +import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; +import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; + +import lombok.RequiredArgsConstructor; + +/** + * Quartz에 의해 트리거되는 Spring Batch Job 실행 클래스. + * JobLauncher를 통해 {@code rssJob}을 수동 실행하며, 매 실행 시 고유한 JobParameters를 생성하여 중복 실행을 방지한다. + * + * - @DisallowConcurrentExecution: 이전 실행이 끝나기 전에는 새로운 실행이 중첩되지 않도록 제한 + * - JobParameters에 timestamp를 포함시켜 매번 다른 인스턴스로 실행되도록 설정 + * + * 이 클래스는 단순한 실행자 역할만 하며, 실제 배치 로직은 {@code rssJob} 내부에 정의되어 있다. + * + * @since 2025-05-10 + */ +@Component +@DisallowConcurrentExecution +@RequiredArgsConstructor +public class BatchJobExecutor implements org.quartz.Job { + private final JobLauncher jobLauncher; + private final Job articleCollectJob; + private final BatchJobExceptionTranslator batchJobExceptionTranslator; + + /** + * Quartz 트리거에 의해 호출되는 메서드. + * 내부적으로 Spring Batch Job을 실행하는 로직을 위임한다. + * + * @param jobExecutionContext Quartz 실행 컨텍스트 + * @author 함예정 + * @since 2025-05-10 + */ + @Override + public void execute(JobExecutionContext jobExecutionContext) { + startSpringBatchJob(); + } + + /** + * Spring Batch Job을 JobLauncher를 통해 실행한다. + * 각 실행마다 timestamp 파라미터를 부여하여 중복 실행 방지. + * 예외 발생 시 {@link ArticleCollectorException}으로 변환하여 처리한다. + * + * @author 함예정 + * @since 2025-05-10 + */ + private void startSpringBatchJob() { + JobParameters params = new JobParametersBuilder() + .addLong("timestamp", System.currentTimeMillis()) + .toJobParameters(); + + try { + jobLauncher.run(articleCollectJob, params); + } catch (Exception e) { + ArticleCollectorErrorCode exceptionCode = batchJobExceptionTranslator.translate(e); + throw new ArticleCollectorException(exceptionCode); + } + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/executor/TaskExecutorConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/executor/TaskExecutorConfig.java new file mode 100644 index 0000000..a5fc332 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/executor/TaskExecutorConfig.java @@ -0,0 +1,49 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.executor; + +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.core.task.TaskExecutor; +import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor; + +import com.likelion.backendplus4.talkpick.batch.common.decorator.logging.MdcTaskDecorator; + +import lombok.Getter; +import lombok.RequiredArgsConstructor; + +/** + * 비동기 작업 처리를 위한 TaskExecutor 설정 클래스. + * Spring Batch 또는 기타 병렬 처리를 위한 ThreadPool 기반 Executor를 정의한다. + * + * - {@link MdcTaskDecorator}를 통해 각 스레드에 MDC 로그 컨텍스트를 유지 + * + * @since 2025-05-02 + * @modified 2025-05-10 + */ +@Configuration +@RequiredArgsConstructor +public class TaskExecutorConfig { + private final MdcTaskDecorator mdcTaskDecorator; + + @Getter + private static final String normalExecutorName = "normalExecutor"; + + /** + * 일반적인 작업처리를 위한 ThreadPool 기반 TaskExecutor 설정 + * + * @return TaskExecutor 인스턴스 + * @author 함예정 + * @since 2025-05-02 + * @modified 2025-05-10 + */ + @Bean(normalExecutorName) + public TaskExecutor taskExecutor() { + ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor(); + executor.setCorePoolSize(5); + executor.setMaxPoolSize(10); + executor.setQueueCapacity(10); + executor.setTaskDecorator(mdcTaskDecorator); + executor.setThreadNamePrefix("normalExecutor-"); + executor.initialize(); + return executor; + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/quartz/CollectorQuartzJobConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/quartz/CollectorQuartzJobConfig.java new file mode 100644 index 0000000..1dd273f --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/quartz/CollectorQuartzJobConfig.java @@ -0,0 +1,46 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.quartz; + +import org.quartz.JobBuilder; +import org.quartz.JobDetail; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.executor.BatchJobExecutor; + +/** + * RSS 피드를 정기적으로 수집하는 스케줄 작업 + * application.yml의 spring.quartz.article-collector.cron 속성으로 실행 주기 설정 + * 설정이 없을 경우 기본값으로 1분마다 실행 + * + * 1. 서비스를 거쳐서 피드 수집 + * 2. 처리된 항목 수 로깅 + * 3. 예외 발생 시 오류 로깅하고 다음 스케쥴까지 대기 + * + * @modified 2025-05-17 + * @since 2025-05-10 최초 작성 + * + */ +@Configuration +public class CollectorQuartzJobConfig { + private final String articleCollectorJobDetailName = "articleCollectorJobDetail"; + + + /** + * RSS 수집 Quartz JobDetail 빈 등록. + * Job 클래스는 {@link BatchJobExecutor}이며 다음과 같은 설정을 포함한다: + * - withIdentity("rssBatchJob"): Scheduler 내에서 이 Job을 고유하게 식별하기 위한 이름 지정 + * - storeDurably(): Trigger가 없더라도 Scheduler에 등록된 상태로 유지되도록 설정 + * + * @return RSS 배치 작업용 JobDetail 객체 + * @author 함예정 + * @since 2025-05-10 + */ + @Bean + public JobDetail articleCollectorJobDetail() { + return JobBuilder.newJob(BatchJobExecutor.class) + .withIdentity(articleCollectorJobDetailName) + .storeDurably() + .build(); + } + +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/quartz/CollectorQuartzTriggerConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/quartz/CollectorQuartzTriggerConfig.java new file mode 100644 index 0000000..394c76e --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/config/quartz/CollectorQuartzTriggerConfig.java @@ -0,0 +1,49 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.quartz; + +import org.quartz.CronScheduleBuilder; +import org.quartz.JobDetail; +import org.quartz.Trigger; +import org.quartz.TriggerBuilder; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +@Configuration +public class CollectorQuartzTriggerConfig { + private final String cronExpression; + private final JobDetail articleCollectorJobDetail; + private final String articleCollectorJobDetailName = "articleCollectorJobDetail"; + + /** + * 생성자 주입을 통해 Cron 표현식을 설정한다. + * + * @param cronExpression RSS 배치 실행 주기를 정의하는 Cron 표현식 + * application.yml에서 spring.quartz.article-collector.cron 값을 로드 합니다. + * @author 함예정 + * @since 2025-05-10 + */ + public CollectorQuartzTriggerConfig(@Value("${spring.quartz.article-collector.cron}") String cronExpression, + JobDetail articleCollectorJobDetail) { + this.cronExpression = cronExpression; + this.articleCollectorJobDetail = articleCollectorJobDetail; + } + + /** + * RSS 수집 Quartz Trigger 빈 등록. + * - forJob: 이 Trigger 가 어떤 Quartz Job 과 연관되어 실행될지를 지정 + * - withIdentity: Scheduler 내에서 이 Trigger 를 고유하게 식별하기 위한 이름 지정 + * - withSchedule: Cron 표현식을 사용하여 실행 주기 설정 + * + * @return RSS 배치 작업용 Trigger 객체 + * @author 함예정 + * @since 2025-05-10 + */ + @Bean + public Trigger rssBatchTrigger() { + return TriggerBuilder.newTrigger() + .forJob(articleCollectorJobDetail) + .withIdentity(articleCollectorJobDetailName + "trigger") + .withSchedule(CronScheduleBuilder.cronSchedule(cronExpression)) + .build(); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/processor/RssEntryProcessor.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/processor/RssEntryProcessor.java new file mode 100644 index 0000000..7468f15 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/processor/RssEntryProcessor.java @@ -0,0 +1,103 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.processor; + +import java.util.ArrayList; +import java.util.List; + +import org.springframework.batch.core.configuration.annotation.StepScope; +import org.springframework.batch.item.ItemProcessor; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper.factory.RssMappingFactory; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.batch.RssSource; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper.AbstractRssMapper; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; +import com.rometools.rome.feed.synd.SyndEntry; + +/** + * RSS 소스를 기반으로 기사 목록을 생성하는 Spring Batch ItemProcessor 구현체. + * 주어진 {@link RssSource}의 URL로부터 SyndEntry 목록을 파싱하고, + * 매핑 전략에 따라 {@link ArticleEntity} 리스트로 변환한다. + * + * - RSS 파싱: {@link RssFeedReader}를 통해 피드를 읽어옴 + * - 데이터 매핑: {@link RssMappingFactory}에서 소스 타입에 따라 매퍼 선택 + * + * 이 클래스는 Step 실행 시에만 생성되며, StepScope에 따라 각 파티션마다 독립적으로 주입된다. + * + * @since 2025-05-10 + * @modified 2025-05-13 RssMapper to AbstractRssMapper로 변경 + */ +@Component +@StepScope +public class RssEntryProcessor implements ItemProcessor> { + + private final RssFeedReader rssFeedReader; + private final RssMappingFactory mappingFactory; + + public RssEntryProcessor(RssFeedReader rssFeedReader, RssMappingFactory mappingFactory) { + this.rssFeedReader = rssFeedReader; + this.mappingFactory = mappingFactory; + } + + /** + * 단일 RSS 소스를 받아 파싱 후 기사 리스트로 변환한다. + * - RSS 파싱 + * - 매핑 전략 선택 + * - 기사 리스트 생성 + * + * @param source RSS 피드 소스 정보 + * @return 해당 소스에서 추출된 기사 엔티티 리스트 + * @author 함예정 + * @since 2025-05-10 + */ + @Override + public List process(RssSource source) { + List rssParseResult = parseRss(source); + AbstractRssMapper mapper = getMapper(source); + + return buildArticleEntityList(source, rssParseResult, mapper); + } + + /** + * RSS 소스의 URL을 기반으로 피드를 파싱하여 SyndEntry 리스트를 반환한다. + * + * @param source RSS 피드 소스 + * @return 파싱된 RSS 엔트리 리스트 + * @since 2025-05-10 + * @author 함예정 + * @modified 2025-05-18 매퍼 타입 전달하도록 수정 + */ + private List parseRss(RssSource source) { + return rssFeedReader.getFeed(source.getUrl(), source.getMapperType()); + } + + /** + * RSS 소스의 매퍼 타입에 따라 적절한 매퍼를 반환한다. + * + * @param source 매핑 전략이 포함된 RSS 소스 + * @return 매퍼 인스턴스 + * @since 2025-05-10 + * @author 함예정 + * @modified 2025-05-13 AbstractRssMapper 타입으로 변경 + */ + private AbstractRssMapper getMapper(RssSource source) { + return mappingFactory.getMapper(source.getMapperType()); + } + + /** + * SyndEntry RSS 결과를 기반으로 ArticleEntity 리스트를 생성한다. + * + * @param source RSS 소스 정보 + * @param rssParseResult RSS 피드에서 파싱된 엔트리 리스트 + * @param mapper 소스에 맞는 RSS 매퍼 + * @return 변환된 ArticleEntity 리스트 + * @since 2025-05-10 + * @author 함예정 + * @modified 2025-05-13 AbstractRssMapper 타입으로 변경 + */ + private List buildArticleEntityList(RssSource source, List rssParseResult, + AbstractRssMapper mapper) { + return rssParseResult.stream() + .map(entry -> mapper.mapToRssNews(entry, source)) + .toList(); + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/processor/RssFeedReader.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/processor/RssFeedReader.java new file mode 100644 index 0000000..1e74a3a --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/processor/RssFeedReader.java @@ -0,0 +1,175 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.processor; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.net.URLConnection; +import java.time.LocalDateTime; +import java.time.ZoneId; +import java.util.Date; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.stream.Collectors; + +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; +import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.repository.NewsInfoJpaRepository; +import com.rometools.rome.feed.synd.SyndEntry; +import com.rometools.rome.feed.synd.SyndFeed; +import com.rometools.rome.io.SyndFeedInput; +import com.rometools.rome.io.XmlReader; +import lombok.extern.slf4j.Slf4j; +/** + * RSS 피드 URL을 통해 XML 피드를 읽고 파싱하여 {@link SyndEntry} 목록으로 반환하는 Reader 클래스. + * Rome 라이브러리를 이용하여 RSS를 파싱하며, 유효하지 않은 URL 또는 파싱 오류에 대해 예외를 처리한다. + * + * @modified 2025-05-18 최신 발행일 이후 데이터만 필터링하는 기능 추가 + * @since 2025-05-10 + */ +@Slf4j +@Component +public class RssFeedReader { + private static final Map lastProcessedDateMap = new ConcurrentHashMap<>(); + private final NewsInfoJpaRepository rssNewsRepository; + + @Autowired + public RssFeedReader(NewsInfoJpaRepository rssNewsRepository) { + this.rssNewsRepository = rssNewsRepository; + } + + /** + * 주어진 피드 URL로부터 RSS 피드를 파싱하고, 최신 발행일 이후의 {@link SyndEntry} 리스트를 반환한다. + * + * @param feedUrl RSS 피드의 URL 문자열 + * @param mapperType 매퍼 타입 (언론사 코드) + * @return 파싱 및 필터링된 SyndEntry 목록 + * @modified 2025-05-18 최신 발행일 이후 데이터만 필터링하는 기능 추가 + * @author 함예정 + * @since 2025-05-10 + */ + public List getFeed(String feedUrl, String mapperType) { + URL url = getURL(feedUrl); + URLConnection connection = openConnectionWithTimeout(url); + List entries = parseRssEntries(connection); + + LocalDateTime latestPubDate = getLatestPubDate(mapperType); + + List filteredEntries = entries.stream() + .filter(entry -> isAfterLatestPubDate(entry, latestPubDate)) + .collect(Collectors.toList()); + + return filteredEntries; + } + + /** + * 언론사별 최신 발행일 조회 (캐싱 추가) + * + * @param mapperType 매퍼 타입 (언론사 코드) + * @return 최신 발행일 또는 기본값 + */ + private LocalDateTime getLatestPubDate(String mapperType) { + LocalDateTime latestPubDate = rssNewsRepository.findLatestPubDateByGuidPrefix(mapperType); + + if (null == latestPubDate) { + latestPubDate = getDefaultPubDate(); + } + + lastProcessedDateMap.put(mapperType, latestPubDate); + return latestPubDate; + } + + private LocalDateTime getDefaultPubDate() { + LocalDateTime latestPubDate = LocalDateTime.now().minusDays(1); + return latestPubDate; + } + + /** + * 항목의 발행일이 최신 발행일보다 이후인지 확인 + * + * @param entry RSS 항목 + * @param latestPubDate 최신 발행일 + * @return 최신 발행일 이후면 true + */ + private boolean isAfterLatestPubDate(SyndEntry entry, LocalDateTime latestPubDate) { + if (null == entry.getPublishedDate()) { + log.debug("발행일 없음 - 항목 제외: {}", entry.getTitle()); + return false; + } + + LocalDateTime pubDate = convertToLocalDateTime(entry.getPublishedDate()); + + boolean isAfter = pubDate.isAfter(latestPubDate); + + return isAfter; + } + + /** + * Date 객체를 LocalDateTime으로 변환 + * + * @param date 변환할 Date 객체 + * @return 변환된 LocalDateTime + */ + private LocalDateTime convertToLocalDateTime(Date date) { + return date.toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime(); + } + + /** + * 문자열 형태의 URL을 {@link URL} 객체로 변환한다. + * + * @param feedUrl 문자열 형태의 URL + * @return URL 객체 + * @throws RuntimeException 유효하지 않은 URL 형식일 경우 + * @since 2025-05-10 + * @author 함예정 + */ + private URL getURL(String feedUrl) { + try { + return new URL(feedUrl); + } catch (MalformedURLException e) { + throw new RuntimeException(e); + } + } + + /** + * 지정된 URL에 대해 연결 타임아웃과 읽기 타임아웃을 설정한 후 URLConnection을 반환합니다. + * + * @param url 연결할 URL 객체 + * @return 설정된 타임아웃을 가진 URLConnection 객체 + * @throws RuntimeException 연결 중 IOException이 발생할 경우 런타임 예외로 래핑하여 던짐 + * @author 함예정 + * @since 2025-05-12 + */ + private URLConnection openConnectionWithTimeout(URL url) { + try { + URLConnection connection = url.openConnection(); + connection.setConnectTimeout(3000); + connection.setReadTimeout(5000); + return connection; + } catch (IOException e) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.FEED_CONNECTION_ERROR, e); + } + } + + /** + * 주어진 URLConnection으로부터 RSS 피드를 읽어 SyndEntry 목록으로 파싱합니다. + * + * @param connection RSS 피드를 제공하는 URLConnection 객체 + * @return 파싱된 SyndEntry 객체 리스트 + * @throws ArticleCollectorException RSS 피드 파싱 중 오류가 발생한 경우 사용자 정의 예외로 래핑하여 던짐 + * @author 함예정 + * @since 2025-05-12 + */ + private List parseRssEntries(URLConnection connection) { + try (XmlReader reader = new XmlReader(connection)) { + SyndFeedInput input = new SyndFeedInput(); + SyndFeed syndFeed = input.build(reader); + return syndFeed.getEntries(); + } catch (Exception e) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.FEED_PARSING_ERROR, e); + } + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/reader/ArticleReader.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/reader/ArticleReader.java new file mode 100644 index 0000000..e6f1e20 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/reader/ArticleReader.java @@ -0,0 +1,51 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.reader; + +import java.util.Iterator; +import java.util.List; + +import org.springframework.batch.core.configuration.annotation.StepScope; +import org.springframework.batch.item.ItemReader; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.batch.RssSource; + +import lombok.extern.slf4j.Slf4j; + +/** + * StepExecutionContext로부터 전달받은 RSS 소스 리스트를 순차적으로 제공하는 ItemReader 구현체. + * Spring Batch의 Step 내부에서 Partition 단위로 각 소스를 하나씩 읽어 처리하는 데 사용된다. + * + * @since 2025-05-10 + */ +@Slf4j +@Component +@StepScope +public class ArticleReader implements ItemReader { + + private final Iterator iterator; + + /** + * StepExecutionContext에 저장된 RSS 소스 리스트를 기반으로 Iterator를 초기화한다. + * + * @param sources StepExecutionContext로부터 전달된 RSS 소스 리스트 + * @since 2025-05-10 + * @author 함예정 + */ + public ArticleReader(@Value("#{stepExecutionContext['sourceList']}") List sources) { + this.iterator = sources.iterator(); + } + + /** + * RSS 소스를 하나씩 반환하며, 더 이상 남은 소스가 없으면 null을 반환한다. + * null 반환 시 해당 Step의 실행이 종료된다. + * + * @return 하나의 RssSource 또는 null + * @since 2025-05-10 + * @author 함예정 + */ + @Override + public RssSource read() { + return iterator.hasNext() ? iterator.next() : null; + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/reader/RssSourcePartitioner.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/reader/RssSourcePartitioner.java new file mode 100644 index 0000000..a9ec1e7 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/reader/RssSourcePartitioner.java @@ -0,0 +1,124 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.reader; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.springframework.batch.core.partition.support.Partitioner; +import org.springframework.batch.item.ExecutionContext; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.batch.RssSource; + +/** + * 활성화된 RSS 소스를 파티션 단위로 분할하여 StepExecutionContext에 전달하는 Partitioner 구현체. + * Spring Batch에서 멀티 스레드/병렬 실행을 위해 사용된다. + *

+ * 각 파티션은 sourceList를 포함한 ExecutionContext로 구성된다. + * + * @since 2025-05-10 + */ +@Component +public class RssSourcePartitioner implements Partitioner { + /** + * 전체 RSS 소스를 파티셔닝하여 각 파티션별 ExecutionContext를 생성한다. + * 모든 활성화된 RSS 소스(카테고리 포함)를 처리한다. + * + * @param gridSize 실행할 파티션 수 + * @return 파티션 이름과 ExecutionContext의 매핑 정보 + * @modified 2025-05-14 모든 카테고리 처리하도록 수정 + * @author 함예정 + * @since 2025-05-10 + */ + @Override + public Map partition(int gridSize) { + List allSources = RssSource.getEnabledSources(); + + int chunkSize = calculateChunkSize(allSources.size(), gridSize); + return buildPartitions(allSources, chunkSize); + } + + /** + * 총 소스 수와 파티션 수를 기반으로 파티션당 소스 개수를 계산한다. + * + * @param totalSources 전체 RSS 소스 수 + * @param gridSize 파티션 수 + * @return 파티션당 소스 개수 + * @since 2025-05-10 + */ + private int calculateChunkSize(int totalSources, int gridSize) { + int chunkSize = (int) Math.ceil((double) totalSources / gridSize); + return chunkSize; + } + + /** + * RSS 소스를 주어진 chunkSize로 나눠 각 파티션별 ExecutionContext를 생성한다. + * + * @param sources RSS 소스 리스트 + * @param chunkSize 파티션당 소스 개수 + * @return 파티션 맵 + * @since 2025-05-10 + */ + private Map buildPartitions(List sources, int chunkSize) { + Map partitions = new HashMap<>(); + int totalPartitions = calculateTotalPartitions(sources, chunkSize); + + for (int i = 0; i < totalPartitions; i++) { + int from = i * chunkSize; + int to = calculateChunkEndIndex(sources, chunkSize, from); + + if (from >= to) { + break; + } + + ExecutionContext context = buildExecutionContext(sources, from, to); + partitions.put("partition" + i, context); + } + + return partitions; + } + + /** + * 주어진 RSS 소스 리스트를 청크 크기(chunkSize)로 분할할 때 필요한 총 파티션 수를 계산합니다. + * + * @param sources RSS 소스 목록 + * @param chunkSize 하나의 파티션에 포함될 RSS 소스 수 + * @return 전체 파티션 수 + * @since 2025-05-12 + */ + private int calculateTotalPartitions(List sources, int chunkSize) { + return (sources.size() + chunkSize - 1) / chunkSize; + } + + /** + * 주어진 시작 인덱스(from)와 청크 크기(chunkSize)를 기반으로, + * 리스트의 범위를 초과하지 않도록 제한된 끝 인덱스를 계산합니다. + * + * @param sources RSS 소스 리스트 + * @param chunkSize 하나의 파티션에 포함될 RSS 소스 수 + * @param from 시작 인덱스 + * @return 리스트 범위를 초과하지 않는 끝 인덱스 + * @since 2025-05-12 + */ + private int calculateChunkEndIndex(List sources, int chunkSize, int from) { + return Math.min(from + chunkSize, sources.size()); + } + + /** + * 지정된 인덱스 범위에 해당하는 RSS 소스 부분 리스트로 ExecutionContext를 생성한다. + * 생성된 context는 Spring Batch 파티션 실행 시 각 Step에 전달된다. + * + * @param sources 전체 RSS 소스 리스트 + * @param from 시작 인덱스 (포함) + * @param to 종료 인덱스 (미포함) + * @return 파티션별 RSS 소스가 포함된 ExecutionContext + * @since 2025-05-10 + */ + private ExecutionContext buildExecutionContext(List sources, int from, int to) { + List subList = new ArrayList<>(sources.subList(from, to)); + ExecutionContext context = new ExecutionContext(); + context.put("sourceList", subList); + return context; + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/AbstractRssMapper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/AbstractRssMapper.java new file mode 100644 index 0000000..1a9cec2 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/AbstractRssMapper.java @@ -0,0 +1,254 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; +import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.batch.RssSource; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.result.ScrapingResult; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.ContentScraper; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.factory.ScraperFactory; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.util.HtmlParser; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.util.ParagraphUtil; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; +import com.rometools.rome.feed.synd.SyndContent; +import com.rometools.rome.feed.synd.SyndEntry; + +import java.io.IOException; +import java.time.LocalDateTime; +import java.time.ZoneId; +import java.util.*; +import java.util.stream.Collectors; + +/** + * RSS를 ArticleEntity로 변환하는 추상 클래스 + * 공통 변환 로직을 제공 + * + * @author 양병학 + * @since 2025-05-13 최초 작성 + * @modified 2025-05-15 의존성 주입 방식 개선 (템플릿 메서드 패턴 적용) + */ + +public abstract class AbstractRssMapper { + + protected abstract ScraperFactory getScraperFactory(); + + /** + * 매퍼의 유형을 식별하는 코드 반환 + * 소문자 언론사 코드 형태 (예: "km", "da", "kh") + */ + public abstract String getMapperType(); + + /** + * RSS 피드를 ArticleEntity 엔티티로 변환하는 템플릿 메소드 + * + * @param entry 변환할 SyndEntry(Rss 데이터) 객체 + * @param source RSS 소스 정보 + * @return 변환된 ArticleEntity 엔티티 + */ + public final ArticleEntity mapToRssNews(SyndEntry entry, RssSource source) { + String guid = extractGuid(entry, source); + String title = extractTitle(entry); + String link = extractLink(entry); + LocalDateTime pubDate = extractPubDate(entry); + String category = extractCategory(entry, source); + String imageUrl = extractImageUrl(entry); + + String baseDescription = extractDescription(entry); + ScrapingResult result = performSpecificMapping(entry, source, link, baseDescription, imageUrl); + + return ArticleEntity.builder() + .title(title) + .link(link) + .pubDate(pubDate) + .category(category) + .guid(guid) + .description(result.getDescription()) + .imageUrl(result.getImageUrl()) + .build(); + } + + /** + * 매퍼 유형에 따른 처리를 수행하는 추상 메소드 + * + * @param entry RSS 항목 + * @param source RSS 소스 정보 + * @param link 기사 링크 + * @param baseDescription RSS에서 추출한 기본 설명 + * @param baseImageUrl RSS에서 추출한 기본 이미지 URL + * @return 매핑 결과 (설명과 이미지 URL) + */ + protected abstract ScrapingResult performSpecificMapping( + SyndEntry entry, + RssSource source, + String link, + String baseDescription, + String baseImageUrl); + + /** + * Date 객체를 LocalDateTime으로 변환 + * + * @param date 변환할 Date 객체 + * @return 변환된 LocalDateTime 객체, date가 null이면 현재 시간 반환 + */ + protected LocalDateTime convertToLocalDateTime(Date date) { + return (null != date) + ? date.toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime() + : LocalDateTime.now(); + } + + /** + * 제목 추출 메서드 + */ + protected String extractTitle(SyndEntry entry) { + return entry.getTitle(); + } + + /** + * 링크 추출 메서드 + * + * @param entry RSS 항목 + * @return 링크 + */ + protected String extractLink(SyndEntry entry) { + return entry.getLink(); + } + + /** + * 발행일 추출 메서드 + * + * @param entry RSS 항목 + * @return 발행일 + */ + protected LocalDateTime extractPubDate(SyndEntry entry) { + return convertToLocalDateTime(entry.getPublishedDate()); + } + + /** + * 설명 추출 메서드 + * + * @param entry RSS 항목 + * @return 추출된 설명 + */ + protected String extractDescription(SyndEntry entry) { + return null != entry.getDescription() ? entry.getDescription().getValue() : null; + } + + /** + * 이미지 URL 추출 메서드 + * media:content 태그에서 이미지 URL 추출 + * + * @param entry RSS 항목 + * @return 이미지 URL + */ + protected String extractImageUrl(SyndEntry entry) { + return entry.getForeignMarkup().stream() + .filter(element -> "content".equals(element.getName()) && + "media".equals(element.getNamespacePrefix())) + .findFirst() + .map(element -> element.getAttributeValue("url")) + .orElse(""); + } + + /** + * 카테고리 추출 메서드 + * + * @param entry RSS 항목 + * @param source RSS 소스 정보 + * @return 카테고리 + */ + protected String extractCategory(SyndEntry entry, RssSource source) { + return source.getCategoryName(); + } + + /** + * GUID 추출 메서드 - 하위 클래스에서 구현해야 함 + * + * @param entry RSS 항목 + * @param source RSS 소스 정보 + * @return GUID + */ + /** + * GUID 추출을 위한 템플릿 메소드 + * + * @param entry RSS 항목 + * @param source RSS 소스 정보 + * @return 신문사 코드 + 고유 ID 형태의 GUID + * @throws ArticleCollectorException 링크가 없거나 ID 추출 실패 시 + */ + protected final String extractGuid(SyndEntry entry, RssSource source) { + validateEntryLink(entry.getLink()); + + String uniqueId = extractUniqueIdFromLink(entry.getLink()); + validateUniqueId(uniqueId); + + return source.getCodePrefix() + uniqueId; + } + + /** + * 링크에서 고유 ID를 추출하는 추상 메소드 + * 각 매퍼가 자신의 URL 패턴에 맞게 구현 + * + * @param link 기사 링크 + * @return 추출된 고유 ID + * @throws ArticleCollectorException 링크가 null이거나 ID를 추출할 수 없는 경우 + */ + protected abstract String extractUniqueIdFromLink(String link); + + private ArticleEntity buildArticleEntity(String title, String link, LocalDateTime pubDate, + String guid, String description, String category, String imageUrl) { + return ArticleEntity.builder() + .title(title) + .link(link) + .pubDate(pubDate) + .category(category) + .guid(guid) + .description(description) + .imageUrl(imageUrl) + .build(); + } + + /** + * 링크가 null이거나 비어있는지 검증 + * + * @param link 검증할 링크 + * @throws ArticleCollectorException 링크가 유효하지 않을 경우 + */ + private void validateEntryLink(String link) { + if (isNullOrEmpty(link)) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.ARTICLE_ID_EXTRACTION_ERROR); + } + } + + /** + * 고유 ID가 null이거나 비어있는지 검증 + * + * @param uniqueId 검증할 고유 ID + * @throws ArticleCollectorException 고유 ID가 유효하지 않을 경우 + */ + private void validateUniqueId(String uniqueId) { + if (isNullOrEmpty(uniqueId)) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.ARTICLE_ID_EXTRACTION_ERROR); + } + } + + /** + * 문자열이 null이거나 비어있는지 확인 + * + * @param str 확인할 문자열 + * @return null이거나 비어있으면 true, 그렇지 않으면 false + */ + private boolean isNullOrEmpty(String str) { + return null == str || str.trim().isEmpty(); + } + + /** + * 리스트가 null이거나 비어있는지 확인 + * + * @param list 확인할 리스트 + * @return null이거나 비어있으면 true, 그렇지 않으면 false + */ + private boolean isNullOrEmptyList(List list) { + return null == list || list.isEmpty(); + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/factory/RssMappingFactory.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/factory/RssMappingFactory.java new file mode 100644 index 0000000..54f2e82 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/factory/RssMappingFactory.java @@ -0,0 +1,50 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper.factory; + +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Component; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper.AbstractRssMapper; + +/** + * RSS 매핑 전략을 관리하는 팩토리 클래스 + * + * @since 2025-05-10 + * @modified 2025-05-13 RssMapper 인터페이스 대신 AbstractRssMapper 사용 + */ +@Component +public class RssMappingFactory { + + private final Map mappers = new HashMap<>(); + + /** + * AbstractRssMapper 구현체를 받아서 Mapper에 등록 + * + * @param availableMappers AbstractRssMapper List 목록 + */ + @Autowired + public RssMappingFactory(List availableMappers) { + for (AbstractRssMapper mapper : availableMappers) { + String mapperType = mapper.getMapperType(); + mappers.put(mapperType, mapper); + } + } + + /** + * 타입에 맞게 mapper 반환 + * + * @param type 매퍼 타입 (소문자 언론사 코드) + * @return 해당 타입의 RSS 매퍼 + * @throws IllegalArgumentException 지원하지 않는 타입인 경우 + */ + public AbstractRssMapper getMapper(String type) { + AbstractRssMapper mapper = mappers.get(type); + if (mapper == null) { + throw new IllegalArgumentException("Mapper 없음: " + type); + } + return mapper; + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/DongaRssMapper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/DongaRssMapper.java new file mode 100644 index 0000000..8e9c976 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/DongaRssMapper.java @@ -0,0 +1,191 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper.implement; + +import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; +import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.batch.RssSource; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper.AbstractRssMapper; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.result.ScrapingResult; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.ContentScraper; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.factory.ScraperFactory; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; +import com.rometools.rome.feed.synd.SyndEntry; + +import groovy.util.logging.Slf4j; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Component; + +import java.time.LocalDateTime; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +/** + * 동아일보 RSS 매퍼 구현체 + * HTML에서 문단을 추출하고 PARAGRAPH_BREAK로 구분하여 반환한다. + * + * @author 양병학 + * @since 2025-05-10 최초 작성 + * @modified 2025-05-15 템플릿 메서드 패턴 적용, 의존성 주입 방식 개선 + * @modified 2025-05-17 HTML 태그 제거 및 문단 구분 기능 추가 + */ +@Slf4j +@Component +public class DongaRssMapper extends AbstractRssMapper { + + private final ScraperFactory scraperFactory; + + @Autowired + public DongaRssMapper(ScraperFactory scraperFactory) { + this.scraperFactory = scraperFactory; + } + + /** + * 템플릿 메서드 패턴에서 사용할 ScraperFactory 반환 + * + * @return 주입받은 ScraperFactory 인스턴스 + * @since 2025-05-15 + */ + @Override + protected ScraperFactory getScraperFactory() { + return this.scraperFactory; + } + + /** + * 매퍼 타입 반환 + * + * @return 매퍼 타입 (da) + * @since 2025-05-10 + */ + @Override + public String getMapperType() { + return "da"; + } + + /** + * 카테고리 정보 추출 + * + * @param entry RSS 항목 + * @param source RSS 소스 정보 + * @return 카테고리 + * @since 2025-05-10 + */ + @Override + protected String extractCategory(SyndEntry entry, RssSource source) { + return source.getCategoryName(); + } + + /** + * 본문만 스크래핑 하는 메소드 + * + * @return ScrapingResult 객체 (스크래핑 정보) + * @since 2025-05-17 + */ + @Override + protected ScrapingResult performSpecificMapping( + SyndEntry entry, + RssSource source, + String link, + String baseDescription, + String baseImageUrl) { + + String scrapedContent = scrapeContent(link); + + return new ScrapingResult(scrapedContent, baseImageUrl); + } + + /** + * 본문 스크래핑을 진행하는 메소드 + * + * @return 스크래핑된 본문 String + * @author 양병학 + * @since 2025-05-17 + */ + private String scrapeContent(String link) { + ContentScraper scraper = getScraperOrThrow(); + String scrapedContent = scrapeAndValidateContent(scraper, link); + return removeUnwantedPhrases(scrapedContent); + } + + private ContentScraper getScraperOrThrow() { + return getScraperFactory().getScraper(getMapperType()) + .orElseThrow(() -> new ArticleCollectorException(ArticleCollectorErrorCode.SCRAPER_NOT_FOUND)); + } + + private String scrapeAndValidateContent(ContentScraper scraper, String link) { + String scrapedContent = scraper.scrapeContent(link); + validateScrapedContent(scrapedContent); + return scrapedContent; + } + + private void validateScrapedContent(String content) { + Optional.ofNullable(content) + .filter(c -> !c.isEmpty()) + .orElseThrow(() -> new ArticleCollectorException(ArticleCollectorErrorCode.EMPTY_ARTICLE_CONTENT)); + } + + /** + * 동아일보 링크에서 고유 ID 추출 + * + * @param link 기사 링크 + * @return 추출된 고유 ID + * @throws ArticleCollectorException 링크가 null이거나 ID를 추출할 수 없는 경우 + * @since 2025-05-10 + */ + @Override + protected String extractUniqueIdFromLink(String link) { + validateLink(link); + return extractIdFromParts(link); + } + + private void validateLink(String link) { + Optional.ofNullable(link) + .map(String::trim) + .filter(l -> !l.isEmpty()) + .orElseThrow(() -> new ArticleCollectorException(ArticleCollectorErrorCode.ARTICLE_ID_EXTRACTION_ERROR)); + } + + private String extractIdFromParts(String link) { + return Optional.of(link.split("/")) + .filter(parts -> parts.length >= 2) + .map(parts -> parts[parts.length - 2]) + .filter(this::isValidId) + .orElseThrow(() -> new ArticleCollectorException(ArticleCollectorErrorCode.ARTICLE_ID_EXTRACTION_ERROR)); + } + + private boolean isValidId(String id) { + return id != null && !id.trim().isEmpty(); + } + + /** + * 불용어 제거 메서드 + * 저작권 문구, 광고 문구 등 불필요한 문구 제거 + * + * @param content 원본 내용 + * @return 불용어가 제거된 내용 + * @since 2025-05-17 + */ + private static final List UNWANTED_PATTERNS = List.of( + "\\(c\\)\\s*동아일보", + "저작권자.*동아일보.*무단.*전재.*금지", + "무단전재 및 재배포 금지", + "\\S+기자\\s+\\S+@donga\\.com", + "동아닷컴 뉴스스탠드", + "동아일보 홈페이지", + "PARAGRAPH_BREAKPARAGRAPH_BREAK" + ); + + protected String removeUnwantedPhrases(String content) { + return Optional.ofNullable(content) + .filter(c -> !c.isEmpty()) + .map(c -> UNWANTED_PATTERNS.stream() + .reduce(c, (current, regex) -> current.replaceAll(regex, ""))) + .map(String::trim) + .orElse(""); + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/KhanRssMapper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/KhanRssMapper.java new file mode 100644 index 0000000..8e2e197 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/KhanRssMapper.java @@ -0,0 +1,303 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper.implement; + +import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; +import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.batch.RssSource; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper.AbstractRssMapper; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.result.ScrapingResult; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.ContentScraper; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.factory.ScraperFactory; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; +import com.rometools.rome.feed.synd.SyndEntry; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Component; + +import java.time.LocalDateTime; + +/** + * 경향신문 RSS 매퍼 구현체 + * ContentScraper를 사용하여 기사 본문 스크래핑 + * + * @author 양병학 + * @since 2025-05-10 최초 작성 + * @modified 2025-05-15 템플릿 메서드 패턴 적용, 의존성 주입 방식 개선 + * @modified 2025-05-17 스크래핑 로직 추가 및 불용어 제거 기능 추가 + */ +@Component +public class KhanRssMapper extends AbstractRssMapper { + + private static final Logger log = LoggerFactory.getLogger(KhanRssMapper.class); + private final ScraperFactory scraperFactory; + + @Autowired + public KhanRssMapper(ScraperFactory scraperFactory) { + this.scraperFactory = scraperFactory; + } + + /** + * 매퍼 타입 반환 + * + * @return 매퍼 타입 (kh: 경향신문) + * @since 2025-05-10 + */ + @Override + public String getMapperType() { + return "kh"; + } + + /** + * 본문 + 이미지 링크를 스크래핑 하는 메소드 + * + * @return ScrapingResult 객체 (스크래핑 정보) + * @since 2025-05-17 + */ + @Override + protected ScrapingResult performSpecificMapping( + SyndEntry entry, + RssSource source, + String link, + String baseDescription, + String baseImageUrl) { + + ContentScraper scraper = getContentScraper(); + + String scrapedContent = scrapeAndProcessContent(scraper, link); + + String finalImageUrl = baseImageUrl; + if (finalImageUrl == null || finalImageUrl.isEmpty()) { + finalImageUrl = scrapeImageUrl(scraper, link); + } + + return new ScrapingResult(scrapedContent, finalImageUrl); + } + + /** + * 경향신문 스크래퍼 가져오기 + * + * @return 경향신문 ContentScraper + * @throws ArticleCollectorException 스크래퍼를 찾을 수 없는 경우 + * @since 2025-05-17 + */ + private ContentScraper getContentScraper() { + return scraperFactory.getScraper("kh") + .orElseThrow(() -> new ArticleCollectorException(ArticleCollectorErrorCode.SCRAPER_NOT_FOUND)); + } + + /** + * 기사 콘텐츠 스크래핑 및 처리 + * + * @param scraper 사용할 ContentScraper + * @param link 기사 링크 + * @return 스크래핑되고 처리된 콘텐츠 + * @throws ArticleCollectorException 스크래핑 실패 시 (내용이 비어있음) + * @since 2025-05-17 + */ + private String scrapeAndProcessContent(ContentScraper scraper, String link) { + String scrapedContent = scrapeContent(scraper, link); + validateScrapedContent(scrapedContent); + return removeUnwantedPhrases(scrapedContent); + } + + private String scrapeContent(ContentScraper scraper, String link) { + return scraper.scrapeContent(link); + } + + private void validateScrapedContent(String content) { + if (null == content || content.isEmpty()) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.EMPTY_ARTICLE_CONTENT); + } + } + + /** + * 이미지 URL 스크래핑 + * + * @param scraper 사용할 ContentScraper + * @param link 기사 링크 + * @return 스크래핑된 이미지 URL + * @throws ArticleCollectorException 스크래핑 실패 시 + * @since 2025-05-17 + */ + private String scrapeImageUrl(ContentScraper scraper, String link) { + String imageUrl = scraper.scrapeImageUrl(link); + validateImageUrl(imageUrl); + return imageUrl; + } + + private void validateImageUrl(String imageUrl) { + if (null == imageUrl || imageUrl.isEmpty()) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.EMPTY_ARTICLE_IMAGE); + } + } + + + + /** + * 템플릿 메서드 패턴에서 사용할 ScraperFactory 반환 + * + * @return 주입받은 ScraperFactory 인스턴스 + * @since 2025-05-15 + */ + @Override + protected ScraperFactory getScraperFactory() { + return this.scraperFactory; + } + + /** + * 불용어 제거 메서드 + * 저작권 문구, 광고 문구 등 불필요한 문구 제거 + * + * @param content 원본 내용 + * @return 불용어가 제거된 내용 + * @since 2025-05-17 + */ + private String removeUnwantedPhrases(String content) { + if (content == null || content.isEmpty()) { + return ""; + } + + content = content.replaceAll("\\(c\\)\\s*경향신문", ""); + content = content.replaceAll("저작권자.*경향신문.*무단.*전재.*금지", ""); + content = content.replaceAll("무단전재 및 재배포 금지", ""); + content = content.replaceAll("\\S+기자\\s+\\S+@khan\\.co\\.kr", ""); + content = content.replaceAll("경향신문 뉴스스탠드", ""); + content = content.replaceAll("경향닷컴", ""); + content = content.replaceAll("PARAGRAPH_BREAKPARAGRAPH_BREAK", "PARAGRAPH_BREAK"); + + return content.trim(); + } + + @Override + protected String extractUniqueIdFromLink(String link) { + validateLink(link); + + try { + String[] pathParts = splitLinkPath(link); + return findArticleIdInPath(pathParts); + } catch (Exception e) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.ITEM_MAPPING_ERROR, e); + } + } + + /** + * 링크 유효성 검사 + * + * @param link 검사할 링크 + * @throws ArticleCollectorException 링크가 null이거나 비어있는 경우 + * @since 2025-05-10 + */ + private void validateLink(String link) { + if (link == null || link.trim().isEmpty()) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.ITEM_MAPPING_ERROR); + } + } + + /** + * 링크를 경로 부분으로 분리 + * + * @param link 분리할 링크 + * @return 경로 부분 배열 + * @since 2025-05-10 + */ + private String[] splitLinkPath(String link) { + return link.split("/"); + } + + /** + * 경로 부분에서 기사 ID 찾기 + * + * @param pathParts 경로 부분 배열 + * @return 기사 ID + * @throws ArticleCollectorException 기사 ID를 찾을 수 없는 경우 + * @since 2025-05-10 + */ + private String findArticleIdInPath(String[] pathParts) { + for (int i = 0; i < pathParts.length - 1; i++) { + if (!"article".equals(pathParts[i])) { + continue; + } + + String id = pathParts[i + 1]; + if (isValidArticleId(id)) { + return id; + } + } + + throw new ArticleCollectorException(ArticleCollectorErrorCode.ITEM_MAPPING_ERROR); + } + + /** + * 기사 ID 유효성 검사 + * + * @param id 검사할 ID + * @return 유효성 여부 + * @since 2025-05-10 + */ + private boolean isValidArticleId(String id) { + return id != null && !id.trim().isEmpty(); + } + + /** + * 발행일 추출, 경향신문은 dc:date 태그 확인 + * + * @param entry RSS 항목 + * @return 발행일 LocalDateTime + * @since 2025-05-10 + */ + @Override + protected LocalDateTime extractPubDate(SyndEntry entry) { + if (entry.getPublishedDate() != null) { + return convertToLocalDateTime(entry.getPublishedDate()); + } + + return extractDcDate(entry); + } + + /** + * date 태그에서 발행일 추출 + * + * @param entry RSS 항목 + * @return 추출된 발행일, 없으면 현재 시간 + * @since 2025-05-10 + */ + private LocalDateTime extractDcDate(SyndEntry entry) { + return entry.getForeignMarkup().stream() + .filter(element -> "date".equals(element.getName()) && + "dc".equals(element.getNamespacePrefix())) + .findFirst() + .map(element -> parseDateTime(element.getValue())) + .orElse(LocalDateTime.now()); + } + + /** + * 문자열을 LocalDateTime으로 파싱 + * + * @param dateString 날짜 문자열 + * @return 파싱된 LocalDateTime, 실패 시 현재 시간 + * @since 2025-05-10 + */ + private LocalDateTime parseDateTime(String dateString) { + try { + return LocalDateTime.parse(dateString); + } catch (Exception e) { + return LocalDateTime.now(); + } + } + + /** + * 카테고리 정보 추출 + * + * @param entry RSS 항목 + * @param source RSS 소스 정보 + * @return 결합된 카테고리 문자열 + * @since 2025-05-10 + */ + @Override + protected String extractCategory(SyndEntry entry, RssSource source) { + return source.getCategoryName(); + } + + +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/KmibRssMapper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/KmibRssMapper.java new file mode 100644 index 0000000..3011a78 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/mapper/implement/KmibRssMapper.java @@ -0,0 +1,271 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper.implement; + +import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; +import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.config.batch.RssSource; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.mapper.AbstractRssMapper; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.result.ScrapingResult; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.factory.ScraperFactory; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.util.HtmlParser; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.util.ParagraphUtil; +import com.rometools.rome.feed.synd.SyndEntry; + +import groovy.util.logging.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Component; + +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * 국민일보 RSS 매퍼 구현체 + * HTML 태그를 제거하고 문단을 PARAGRAPH_BREAK로 구분하여 반환한다. + * + * @author 양병학 + * @since 2025-05-10 최초 작성 + * @modified 2025-05-15 템플릿 메서드 패턴 적용, 의존성 주입 방식 개선 + * @modified 2025-05-17 HTML 태그 제거 및 문단 구분 기능 추가 + */ +@Slf4j +@Component +public class KmibRssMapper extends AbstractRssMapper { + + private static final Pattern ARCID_PATTERN = Pattern.compile("arcid=([0-9]+)"); + private static final Pattern IMG_SRC_PATTERN = Pattern.compile(" paragraphs = htmlParser.extractCleanParagraphs(rawDescription); + validateParagraphs(paragraphs); + + return paragraphUtil.serializeParagraphs(paragraphs); + } + + /** + * 링크에서 arcid 값 추출 + * + * @param link 기사 링크 + * @return 추출된 arcid + * @throws ArticleCollectorException 링크가 null이거나 arcid 추출 실패 시 + * @since 2025-05-10 + */ + @Override + protected String extractUniqueIdFromLink(String link) { + validateLink(link); + + String arcId = extractArcIdFromLink(link); + validateArcId(arcId); + + return arcId; + } + + private void validateLink(String link) { + if (null == link || link.trim().isEmpty()) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.ARTICLE_ID_EXTRACTION_ERROR); + } + } + + private void validateArcId(String arcId) { + if (null == arcId || arcId.trim().isEmpty()) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.ARTICLE_ID_EXTRACTION_ERROR); + } + } + + /** + * 이미지 URL 추출 메서드 + * 국민일보 RSS feed는 media:content 태그 대신 description의 HTML 내 img 태그에서 이미지 URL 추출 + * + * @param entry RSS 항목 + * @return 이미지 URL + * @since 2025-05-10 + */ + @Override + protected String extractImageUrl(SyndEntry entry) { + String mediaContent = super.extractImageUrl(entry); + if (!mediaContent.isEmpty()) { + return mediaContent; + } + + return extractImageFromDescription(entry); + } + + private String extractImageFromDescription(SyndEntry entry) { + if (isEntryDescriptionEmpty(entry)) { + return ""; + } + + String description = entry.getDescription().getValue(); + if (isNullOrEmpty(description)) { + return ""; + } + + return extractImageUrlFromHtml(description); + } + + /** + * HTML에서 이미지 URL을 추출합니다. + * + * @param html 이미지 URL을 추출할 HTML 문자열 + * @return 추출된 이미지 URL 또는 빈 문자열 + * @since 2025-05-18 + * @author 양병학 + */ + private String extractImageUrlFromHtml(String html) { + Matcher matcher = IMG_SRC_PATTERN.matcher(html); + return matcher.find() ? matcher.group(1) : ""; + } + + /** + * RSS description에서 HTML 태그를 제거하고 문단을 추출하여 PARAGRAPH_BREAK로 구분 + * + * 1. 설명 컨텐츠 존재 여부 확인 + * 2. HTML 태그 제거 및 문단 분리 + * 3. 문단을 PARAGRAPH_BREAK로 구분하여 반환 + * + * @param entry RSS 항목 + * @return PARAGRAPH_BREAK로 구분된 문단 텍스트 + * @throws ArticleCollectorException 설명이 비어있거나 파싱 중 오류 발생 시 + * @since 2025-05-10 + * @author 양병학 + * @modified 2025-05-17 HTML 태그 제거 및 문단 구분 기능 추가 + * @modified 2025-05-18 예외 처리 로직 개선 + */ + @Override + protected String extractDescription(SyndEntry entry) { + validateDescriptionExists(entry); + + String rawDescription = entry.getDescription().getValue(); + validateRawDescription(rawDescription); + + try { + List paragraphs = htmlParser.extractCleanParagraphs(rawDescription); + validateParagraphs(paragraphs); + return paragraphUtil.serializeParagraphs(paragraphs); + } catch (Exception e) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.RSS_PARSING_ERROR, e); + } + } + + private void validateDescriptionExists(SyndEntry entry) { + if (null == entry.getDescription()) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.RSS_CONTENT_EMPTY); + } + } + + private void validateRawDescription(String rawDescription) { + if (null == rawDescription || rawDescription.isEmpty()) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.RSS_CONTENT_EMPTY); + } + } + + private void validateParagraphs(List paragraphs) { + if (paragraphs.isEmpty()) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.RSS_CONTENT_EMPTY); + } + } + + /** + * 링크에서 arcId를 추출합니다. + * + * @param link arcId를 추출할 링크 + * @return 추출된 arcId + * @throws ArticleCollectorException arcId를 추출할 수 없는 경우 발생 + * @since 2025-05-18 + * @author 양병학 + */ + private String extractArcIdFromLink(String link) { + Matcher matcher = ARCID_PATTERN.matcher(link); + if (!matcher.find()) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.ARTICLE_ID_EXTRACTION_ERROR); + } + return matcher.group(1); + } + + private boolean isEntryDescriptionEmpty(SyndEntry entry) { + return entry.getDescription() == null; + } + + /** + * 문자열이 null이거나 비어있는지 확인합니다. + * + * @param str 확인할 문자열 + * @return null이거나 비어있으면 true, 그렇지 않으면 false + */ + private boolean isNullOrEmpty(String str) { + return null == str || str.isEmpty(); + } + + +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/result/ScrapingResult.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/result/ScrapingResult.java new file mode 100644 index 0000000..1b53272 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/result/ScrapingResult.java @@ -0,0 +1,19 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.result; + +public class ScrapingResult { + private final String description; + private final String imageUrl; + + public ScrapingResult(String description, String imageUrl) { + this.description = description; + this.imageUrl = imageUrl; + } + + public String getDescription() { + return description; + } + + public String getImageUrl() { + return imageUrl; + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/ContentScraper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/ContentScraper.java new file mode 100644 index 0000000..74e6511 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/ContentScraper.java @@ -0,0 +1,74 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper; + +import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; +import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; +import lombok.extern.slf4j.Slf4j; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; + +import java.io.IOException; +import java.util.List; + +/** + * 뉴스 본문 스크랩 interface + * 스크래핑 로직이 신문사 마다 다름 + * + * @author 양병학 + * @since 2025-05-13 최초 작성 + */ +public interface ContentScraper { + /** + * 뉴스 URL에서 본문 내용을 문단 단위로 스크래핑 + * + * @param url 뉴스 URL + * @return 문단 단위로 나눈 본문 리스트 + * @throws ArticleCollectorException 스크래핑 중 오류 발생 시 + */ + List scrapeParagraphs(String url) throws ArticleCollectorException; + + /** + * 기사 URL에서 본문 내용을 PARAGRAPH_BREAK로 구분된 문자열로 스크래핑 + * + * @param url 기사 URL + * @return PARAGRAPH_BREAK로 구분된 본문 문자열 + * @throws ArticleCollectorException 스크래핑 중 오류 발생 시 + */ + String scrapeContent(String url) throws ArticleCollectorException; + + /** + * 뉴스 URL에서 이미지 URL을 스크래핑 + * + * @param url 뉴스 URL + * @return 스크래핑된 이미지 URL + * @throws ArticleCollectorException 스크래핑 중 오류 발생 시 + */ + String scrapeImageUrl(String url) throws ArticleCollectorException; + + /** + * 스크래퍼가 지원하는 Mapper type 반환 + * + * @return Mapper Type 영문 2자 (예: "km", "da") + */ + String getSupportedMapperType(); + + /** + * URL에 연결하여 Document 객체 반환 (기본 구현) + * + * @param url 연결할 URL + * @return 파싱된 JSoup Document + * @throws ArticleCollectorException 연결 오류 발생 시 SCRAPER_CONNECTION_ERROR 예외 발생 + */ + default Document connectToUrl(String url) throws ArticleCollectorException { + try { + return Jsoup.connect(url) + .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36") + .timeout(5000) + .ignoreContentType(true) + .maxBodySize(1024 * 1024) + .followRedirects(true) + .get(); + } catch (IOException e) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.SCRAPER_CONNECTION_ERROR, e); + } + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/factory/ScraperFactory.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/factory/ScraperFactory.java new file mode 100644 index 0000000..91cebdf --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/factory/ScraperFactory.java @@ -0,0 +1,52 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.factory; + +import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; +import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Component; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.ContentScraper; + +/** + * 뉴스 스크래퍼 사용하는 factory Class + * + * @author 양병학 + * @since 2025-05-13 최초 작성 + */ +@Component +public class ScraperFactory { + + private final Map scrapers = new HashMap<>(); + + /** + * ContentScraper 구현체 등록 + * + * @param availableScrapers ContentScraper 목록 + */ + @Autowired + public ScraperFactory(List availableScrapers) { + for (ContentScraper scraper : availableScrapers) { + String mapperType = scraper.getSupportedMapperType(); + scrapers.put(mapperType, scraper); + } + } + + /** + * Mapper Type에 맞는 스크래퍼 반환 + * + * @param mapperType 매퍼 타입 (예: "km", "da") + * @return 해당 타입의 스크래퍼 or null일시 Optional로 빈 값 반환 + */ + public Optional getScraper(String mapperType) { + ContentScraper scraper = scrapers.get(mapperType); + if (scraper == null) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.SCRAPER_NOT_FOUND); + } + return Optional.of(scraper); + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/implement/DongaContentScraper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/implement/DongaContentScraper.java new file mode 100644 index 0000000..e44980a --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/implement/DongaContentScraper.java @@ -0,0 +1,254 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.implement; + +import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; +import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.ContentScraper; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.util.HtmlScraperUtils; + +import lombok.extern.slf4j.Slf4j; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.springframework.stereotype.Component; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; +import java.util.stream.Collectors; + +/** + * 동아일보 기사 본문 스크래퍼 구현체 + * + * @author 양병학 + * @since 2025-05-13 최초 작성 + * @modified 2025-05-17 동아일보 스포츠 기사 스크래핑 기능 추가 + */ +@Slf4j +@Component +public class DongaContentScraper implements ContentScraper { + + /** + * 동아일보 기사 URL에서 본문 내용, 문단 단위로 스크래핑 + * + * @param url 기사 URL + * @return 문단 단위로 문단 텍스트 + * @throws ArticleCollectorException 스크래핑 중 오류 발생 시 + */ + @Override + public List scrapeParagraphs(String url) throws ArticleCollectorException { + Document document = connectToUrl(url); + List content = extractContent(url, document); + validateContent(content); + return content; + } + + /** + * URL과 문서에 따라 적절한 콘텐츠 추출 메서드를 호출 + * + * @param url 기사 URL + * @param document 파싱된 JSoup Document + * @return 추출된 문단 리스트 + * @throws ArticleCollectorException 콘텐츠 추출 중 오류 발생 시 + */ + private List extractContent(String url, Document document) throws ArticleCollectorException { + if (isSportsArticle(url, document)) { + return extractDongaSportsContent(document); + } + return extractDongaContent(document); + } + + /** + * 추출된 콘텐츠의 유효성 검증 + * + * @param content 추출된 문단 리스트 + * @throws ArticleCollectorException 콘텐츠가 비어있거나 유효하지 않을 때 + */ + private void validateContent(List content) throws ArticleCollectorException { + if (content == null || content.isEmpty() || content.stream().allMatch(String::isEmpty)) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.EMPTY_ARTICLE_CONTENT); + } + } + + /** + * URL 또는 문서 구조를 기반으로 스포츠 기사인지 확인 + * + * @param url 기사 URL + * @param document 파싱된 JSoup Document + * @return 스포츠 기사 여부 + */ + private boolean isSportsArticle(String url, Document document) { + if (url.contains("sports.donga.com") || url.contains("/sports/")) { + return true; + } + + Element articleWord = document.selectFirst("div.article_word#article_body"); + return articleWord != null; + } + + /** + * 동아일보 일반 기사 본문 추출 (section.news_view에서 h2, figure 제외) + * + * @param document JSoup Document + * @return 문단 리스트 + * @throws ArticleCollectorException 본문 파싱 중 오류 발생 시 + */ + private List extractDongaContent(Document document) throws ArticleCollectorException { + try { + Element newsView = findNewsViewElement(document); + if (null == newsView) { + return new ArrayList<>(); + } + + // HTML 처리 및 불필요한 태그 제거 + Element processedView = processHtmlElement(newsView); + + // 전체 텍스트 추출 + String fullText = processedView.text(); + + // 문단 추출 + List paragraphs = extractParagraphsFromText(fullText); + + // 문단이 없는 경우 전체 텍스트를 하나의 문단으로 처리 + if (paragraphs.isEmpty() && !fullText.trim().isEmpty()) { + paragraphs.add(fullText.trim()); + } + + return paragraphs; + } catch (Exception e) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.SCRAPER_PARSING_ERROR, e); + } + } + + /** + * 문서에서 뉴스 본문 영역 찾기 + * + * @param document JSoup Document + * @return 뉴스 본문 Element + */ + private Element findNewsViewElement(Document document) { + return HtmlScraperUtils.findElement(document, "section.news_view"); + } + + /** + * HTML 요소 처리 -
태그를 문단 구분자로 변환하고 불필요한 태그 제거 + * + * @param element 처리할 HTML 요소 + * @return 처리된 HTML 요소 + */ + private Element processHtmlElement(Element element) { + String html = element.html(); + html = html.replaceAll("", "PARAGRAPH_BREAK"); + Element parsedElement = Jsoup.parse(html).body(); + + return HtmlScraperUtils.removeTags(parsedElement, "h2", "figure", "img"); + } + + /** + * 텍스트에서 문단 추출 + * + * @param text 전체 텍스트 + * @return 문단 리스트 + */ + private List extractParagraphsFromText(String text) { + String[] paragraphsArray = text.split("PARAGRAPH_BREAK"); + + return Arrays.stream(paragraphsArray) + .map(String::trim) + .filter(p -> !p.isEmpty()) + .collect(Collectors.toList()); + } + + /** + * 동아일보 스포츠 기사 본문 추출 (div.article_word#article_body) + * + * @param document JSoup Document + * @return 문단 리스트 + * @throws ArticleCollectorException 본문 파싱 중 오류 발생 시 + */ + private List extractDongaSportsContent(Document document) throws ArticleCollectorException { + // 시도할 선택자들을 배열로 정의 + String[] selectors = { + "div.article_word#article_body", + "div.article_word" + }; + + Element articleBody = Arrays.stream(selectors) + .map(selector -> HtmlScraperUtils.findElement(document, selector)) + .filter(Objects::nonNull) + .findFirst() + .orElse(null); + + if (articleBody == null) { + return new ArrayList<>(); + } + + try { + String[] selectorsToRemove = { + "div.photoAd", + "div.subcont_ad01", + "div.view_center", + "p.copyright" + }; + + Arrays.stream(selectorsToRemove) + .forEach(selector -> articleBody.select(selector).remove()); + + String fullText = processHtmlAndExtractText( + articleBody, "img", "script", "style"); + + return Arrays.stream(fullText.split("PARAGRAPH_BREAK")) + .map(String::trim) + .filter(p -> !p.isEmpty()) + .collect(Collectors.toList()); + + } catch (Exception e) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.SCRAPER_PARSING_ERROR, e); + } + } + + private String processHtmlAndExtractText(Element element, String... tagsToRemove) { + String html = element.html(); + html = html.replaceAll("", "PARAGRAPH_BREAK"); + Element parsedElement = Jsoup.parse(html).body(); + + Element processedElement = HtmlScraperUtils.removeTags(parsedElement, tagsToRemove); + + return processedElement.text(); + } + + /** + * 동아일보 기사 URL에서 본문 내용을 PARAGRAPH_BREAK로 구분된 문자열로 스크래핑 + * + * @param url 기사 URL + * @return PARAGRAPH_BREAK로 구분된 본문 문자열 + * @throws ArticleCollectorException 스크래핑 중 오류 발생 시 + */ + @Override + public String scrapeContent(String url) throws ArticleCollectorException { + List paragraphs = scrapeParagraphs(url); + return String.join("PARAGRAPH_BREAK", paragraphs); + } + + /** + * 동아일보 RSS에서 이미지를 가져오므로 빈 문자열 반환 (구현 필요없음) + * + * @param url 기사 URL + * @return 빈 문자열 + * @throws ArticleCollectorException 사용되지 않음 + */ + @Override + public String scrapeImageUrl(String url) throws ArticleCollectorException { + return ""; + } + + /** + * 지원하는 매퍼 타입 반환 + * + * @return 동아일보 매퍼 타입 (da) + */ + @Override + public String getSupportedMapperType() { + return "da"; + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/implement/KhanContentScraper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/implement/KhanContentScraper.java new file mode 100644 index 0000000..d2edba1 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/scraper/implement/KhanContentScraper.java @@ -0,0 +1,267 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.implement; + +import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; +import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.scraper.ContentScraper; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.util.HtmlScraperUtils; + +import lombok.extern.slf4j.Slf4j; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import org.springframework.stereotype.Component; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; + +/** + * 경향신문 기사 본문 스크래퍼 구현체 + * + * @author 양병학 + * @since 2025-05-13 최초 작성 + */ +@Slf4j +@Component +public class KhanContentScraper implements ContentScraper { + + /** + * 경향신문 기사 URL에서 본문 내용을 문단 단위로 스크래핑 + * + * @param url 기사 URL + * @return 문단 단위로 나눈 본문 리스트 + * @throws ArticleCollectorException 스크래핑 중 오류 발생 시 + */ + @Override + public List scrapeParagraphs(String url) throws ArticleCollectorException { + Document document = connectToUrl(url); + List content = extractKhanContent(document); + + if (content == null || content.isEmpty() || content.stream().allMatch(String::isEmpty)) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.EMPTY_ARTICLE_CONTENT); + } + + return content; + } + + /** + * 경향신문 본문 추출 (article.art_body에서 h3, div.art_photo 제외) + * + * @param document JSoup Document + * @return 문단 리스트 + * @throws ArticleCollectorException 본문 파싱 중 오류 발생 시 + */ + private List extractKhanContent(Document document) throws ArticleCollectorException { + try { + List selectors = List.of( + "article.art_body", + "div.art_body", + "div.article_view", + "div.article-body" + ); + // 가장 먼저 매칭되는 Element 하나만 찾기 + Element artBody = selectors.stream() + .map(sel -> HtmlScraperUtils.findElement(document, sel)) + .filter(Objects::nonNull) + .findFirst() + .orElse(null); + // 못 찾았으면 빈 리스트 반환 + if (artBody == null) { + return new ArrayList<>(); + } + // 찾았으면 실제 파싱 로직 호출 + return extractKhanContentFromElement(artBody); + } catch (Exception e) { + throw new ArticleCollectorException( + ArticleCollectorErrorCode.SCRAPER_PARSING_ERROR, e + ); + } + } + + /** + * 경향신문 본문 요소에서 콘텐츠 추출 + * + * @param artBody 기사 본문 요소 + * @return 문단 리스트 + * @throws ArticleCollectorException 본문 요소 처리 중 오류 발생 시 + */ + private List extractKhanContentFromElement(Element artBody) throws ArticleCollectorException { + try { + Element processedBody = HtmlScraperUtils.removeTags(artBody, "h3", "div.art_photo", "img"); + + processedBody.select("*").forEach(el -> { + el.removeAttr("align"); + el.removeAttr("vspace"); + el.removeAttr("hspace"); + el.removeAttr("style"); + el.removeAttr("width"); + el.removeAttr("height"); + }); + + Elements paragraphs = processedBody.select("p"); + + if (paragraphs.isEmpty()) { + paragraphs = processedBody.select("div.article_paragraph"); + } + + if (paragraphs.isEmpty()) { + paragraphs = processedBody.select("span.article_text"); + } + + if (paragraphs.isEmpty()) { + List fallback = new ArrayList<>(); + String fullText = processedBody.text().trim(); + if (!fullText.isEmpty()) { + fallback.add(fullText); + } + return fallback; + } + + List result = new ArrayList<>(); + for (Element p : paragraphs) { + String text = p.text().trim(); + if (!text.isEmpty()) { + result.add(text); + } + } + + return result; + } catch (Exception e) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.SCRAPER_PARSING_ERROR, e); + } + } + + /** + * 경향신문 기사 URL에서 본문 내용을 PARAGRAPH_BREAK로 구분된 문자열로 스크래핑 + * + * @param url 기사 URL + * @return PARAGRAPH_BREAK로 구분된 본문 문자열 + * @throws ArticleCollectorException 스크래핑 중 오류 발생 시 + */ + @Override + public String scrapeContent(String url) throws ArticleCollectorException { + Document document = connectToUrl(url); + List paragraphs = extractKhanContent(document); + + if (paragraphs == null || paragraphs.isEmpty() || paragraphs.stream().allMatch(String::isEmpty)) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.EMPTY_ARTICLE_CONTENT); + } + + return String.join("PARAGRAPH_BREAK", paragraphs); + } + + /** + * 경향신문 기사 URL에서 이미지 URL을 스크래핑 + * + * @param url 기사 URL + * @return 스크래핑된 이미지 URL + * @throws ArticleCollectorException 스크래핑 중 오류 발생 시 + */ + @Override + public String scrapeImageUrl(String url) throws ArticleCollectorException { + Document document = connectToUrl(url); + String imageUrl = extractImageUrlFromDocument(document); + + if (imageUrl == null || imageUrl.isEmpty()) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.EMPTY_ARTICLE_IMAGE); + } + + return imageUrl; + } + + /** + * 문서에서 이미지 URL을 추출합니다. + * 여러 선택자를 순차적으로 시도하여 첫 번째로 발견된 유효한 이미지 URL을 반환합니다. + * + * @param document 이미지를 추출할 JSoup Document + * @return 추출된 이미지 URL 또는 빈 문자열 + * @throws ArticleCollectorException 파싱 중 오류 발생 시 + * @since 2025-05-18 + * @author 양병학 + */ + private String extractImageUrlFromDocument(Document document) throws ArticleCollectorException { + try { + String metaImageUrl = extractMetaImageUrl(document); + if (!metaImageUrl.isEmpty()) { + return metaImageUrl; + } + + return extractImageUrlFromSelectors(document); + } catch (Exception e) { + throw new ArticleCollectorException(ArticleCollectorErrorCode.SCRAPER_PARSING_ERROR, e); + } + } + + /** + * 메타 태그에서 이미지 URL을 추출합니다. + * + * @param document 이미지를 추출할 JSoup Document + * @return 추출된 이미지 URL 또는 빈 문자열 + */ + private String extractMetaImageUrl(Document document) { + Element metaImg = document.selectFirst("meta[property=og:image]"); + if (metaImg != null && !metaImg.attr("content").isEmpty()) { + return metaImg.attr("content"); + } + return ""; + } + + /** + * 다양한 이미지 선택자를 시도하여 이미지 URL을 추출합니다. + * + * @param document 이미지를 추출할 JSoup Document + * @return 추출된 이미지 URL 또는 빈 문자열 + */ + private String extractImageUrlFromSelectors(Document document) { + List simpleSelectors = List.of( + "picture img", + "article.art_body img", + "div.art_photo img", + "figure img", + "img" + ); + + return simpleSelectors.stream() + .map(document::selectFirst) + .filter(Objects::nonNull) + .filter(img -> !img.attr("src").isEmpty()) + .map(img -> img.attr("abs:src")) + .findFirst() + .orElseGet(() -> { + Element source = document.selectFirst("picture source"); + if (source != null && !source.attr("srcset").isEmpty()) { + return extractSourceSetImageUrl(source); + } + return ""; + }); + } + + /** + * source 태그의 srcset 속성에서 이미지 URL을 추출합니다. + * + * @param source srcset 속성을 가진 source 요소 + * @return 추출된 이미지 URL 또는 빈 문자열 + */ + private String extractSourceSetImageUrl(Element source) { + String srcset = source.attr("srcset"); + String[] sources = srcset.split(","); + + if (sources.length > 0) { + String firstSource = sources[0].trim().split("\\s+")[0]; + return source.absUrl("srcset").isEmpty() ? firstSource : source.absUrl("srcset"); + } + + return ""; + } + + /** + * 지원하는 매퍼 타입 반환 + * + * @return 경향신문 매퍼 타입 (kh) + */ + @Override + public String getSupportedMapperType() { + return "kh"; + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/HtmlParser.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/HtmlParser.java new file mode 100644 index 0000000..91ba8b5 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/HtmlParser.java @@ -0,0 +1,71 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.util; + +import org.springframework.stereotype.Component; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +/** + * HTML 파싱 및 문단 추출을 처리하는 유틸리티 클래스 + * + * @author 양병학 + * @since 2025-05-18 + */ +@Component +public class HtmlParser { + + /** + * HTML 문자열에서 모든 태그를 제거하고 문단을 추출 + * + * @param html HTML 문자열 + * @return 정제된 문단 리스트 + */ + public List extractCleanParagraphs(String html) { + if (html == null || html.isEmpty()) { + return new ArrayList<>(); + } + + try { + String withBreaks = html.replaceAll("", "PARAGRAPH_BREAK"); + String noTags = withBreaks.replaceAll("<[^>]*>", ""); + String decoded = decodeHtmlEntities(noTags); + + decoded = decoded.replaceAll("\\s+", " ").trim(); + String[] paragraphs = decoded.split("PARAGRAPH_BREAK"); + + return Arrays.stream(paragraphs) + .map(String::trim) + .filter(p -> !p.isEmpty()) + .collect(Collectors.toList()); + } catch (Exception e) { + List fallback = new ArrayList<>(); + fallback.add(removeAllHtmlTags(html)); + return fallback; + } + } + + private String removeAllHtmlTags(String html) { + if (html == null || html.isEmpty()) { + return ""; + } + + String noTags = html.replaceAll("<[^>]*>", ""); + return decodeHtmlEntities(noTags).replaceAll("\\s+", " ").trim(); + } + + private String decodeHtmlEntities(String text) { + if (text == null || text.isEmpty()) { + return ""; + } + + return text.replace(" ", " ") + .replace(" ", " ") + .replace("<", "<") + .replace(">", ">") + .replace("&", "&") + .replace(""", "\"") + .replace("'", "'"); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/HtmlScraperUtils.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/HtmlScraperUtils.java new file mode 100644 index 0000000..95d077a --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/HtmlScraperUtils.java @@ -0,0 +1,205 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.util; + +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import org.springframework.stereotype.Component; + +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; + +/** + * HTML 태그별 스크래핑 유틸리티 클래스 + * + * @author 양병학 + * @since 2025-05-13 최초 작성 + */ +@Component +public class HtmlScraperUtils { + + /** + * 지정된 CSS 선택자에 해당하는 요소 찾기 + * + * @param document JSoup Document + * @param selector CSS 선택자 + * @return 찾은 요소, 없으면 null + */ + public static Element findElement(Document document, String selector) { + return null != document ? document.selectFirst(selector) : null; + } + + /** + * 지정된 CSS 선택자에 해당하는 모든 요소 찾기 + * + * @param document JSoup Document + * @param selector CSS 선택자 + * @return 찾은 요소들의 목록 + */ + public static Elements findElements(Document document, String selector) { + return null != document ? document.select(selector) : new Elements(); + } + + /** + * 특정 요소에서 지정된 태그들 제거 + * + * @param element 처리할 요소 + * @param tagsToRemove 제거할 태그 목록 (예: "h2", "figure") + * @return 태그가 제거된 요소 (원본은 변경되지 않음) + */ + public static Element removeTags(Element element, String... tagsToRemove) { + return null == element ? null : doRemoveTags(element, tagsToRemove); + } + + private static Element doRemoveTags(Element element, String... tagsToRemove) { + Element clone = element.clone(); + + for (String tag : tagsToRemove) { + clone.select(tag).remove(); + } + + return clone; + } + + /** + * 요소에서 텍스트 추출 (HTML 태그 제거) + * + * @param element 추출할 요소 + * @return 추출된 텍스트, 요소가 null이면 빈 문자열 + */ + public static String extractText(Element element) { + return null != element ? element.text() : ""; + } + + /** + * 여러 요소에서 텍스트 추출하여 결합 + * + * @param elements 처리할 요소들 + * @param separator 텍스트 사이에 넣을 구분자 (예: "\n\n") + * @return 결합된 텍스트 + */ + public static String extractCombinedText(Elements elements, String separator) { + return null != elements && !elements.isEmpty() + ? String.join(separator, elements.stream().map(Element::text).collect(Collectors.toList())) + : ""; + } + + /** + * p 태그의 내용을 개별 문단으로 추출 + * + * @param container p 태그를 포함하는 요소 + * @return 각 p 태그의 내용을 담은 문단 리스트 + */ + public static List extractParagraphs(Element container) { + return null == container ? new ArrayList<>() : doParagraphExtraction(container); + } + + /** + * p 태그 추출 실제 로직 + * + * @param container p 태그를 포함하는 요소 + * @return 추출된 문단 리스트 + */ + private static List doParagraphExtraction(Element container) { + Elements paragraphs = container.select("p"); + return paragraphs.stream() + .map(Element::text) + .filter(text -> !text.trim().isEmpty()) + .collect(Collectors.toList()); + } + + /** + * p 태그의 텍스트를 결합하여 추출 + * + * @param container p 태그를 포함하는 요소 + * @param separator 텍스트 사이에 넣을 구분자 (기본값: "\n\n") + * @return 결합된.텍스트 + */ + public static String extractParagraphText(Element container, String separator) { + return null != container + ? extractCombinedText(container.select("p"), separator) + : ""; + } + + /** + * p 태그의 텍스트를 줄바꿈으로 결합하여 추출 (기본 구분자: "\n\n") + * + * @param container p 태그를 포함하는 요소 + * @return 결합된 텍스트 + */ + public static String extractParagraphText(Element container) { + return extractParagraphText(container, "\n\n"); + } + + /** + * 이미지 URL 추출 + * + * @param document JSoup Document + * @param imgSelector 이미지 선택자 + * @return 이미지 URL, 없으면 빈 문자열 + */ + public static String extractImageUrl(Document document, String imgSelector) { + Element img = findElement(document, imgSelector); + return null != img ? img.absUrl("src") : ""; + } + + /** + * HTML 요소 내용 처리 - 공통 메서드 + * + * @param document JSoup Document + * @param selector 요소 선택자 + * @param excludeTags 제외할 태그 목록 + * @return 처리된 텍스트 + */ + public static String processElement(Document document, String selector, String... excludeTags) { + Element element = findElement(document, selector); + return null != element + ? extractText(removeTags(element, excludeTags)) + : ""; + } + + /** + * section 태그 내용 처리 + * + * @param document JSoup Document + * @param sectionSelector section 태그 선택자 + * @param excludeTags 제외할 태그 목록 + * @return 처리된 텍스트 + */ + public static String processSection(Document document, String sectionSelector, String... excludeTags) { + Element section = findElement(document, sectionSelector); + return null != section + ? extractText(removeTags(section, excludeTags)) + : ""; + } + + /** + * article 태그 내용 처리 + * + * @param document JSoup Document + * @param articleSelector article 태그 선택자 + * @param excludeTags 제외할 태그 목록 + * @return 처리된 텍스트 + */ + public static String processArticle(Document document, String articleSelector, String... excludeTags) { + Element article = findElement(document, articleSelector); + return null != article + ? extractText(removeTags(article, excludeTags)) + : ""; + } + + /** + * div 태그 내용 처리 + * + * @param document JSoup Document + * @param divSelector div 태그 선택자 + * @param excludeTags 제외할 태그 목록 + * @return 처리된 텍스트 + */ + public static String processDiv(Document document, String divSelector, String... excludeTags) { + Element div = findElement(document, divSelector); + return null != div + ? extractText(removeTags(div, excludeTags)) + : ""; + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/ParagraphUtil.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/ParagraphUtil.java new file mode 100644 index 0000000..1233af4 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/support/util/ParagraphUtil.java @@ -0,0 +1,37 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.support.util; + +import org.springframework.stereotype.Component; + +import java.util.List; + +/** + * 문단 처리 유틸리티 클래스 + * + * @author 양병학 + * @since 2025-05-18 + */ +@Component +public class ParagraphUtil { + + /** + * 문단 리스트를 구분자로 연결된 문자열로 직렬화 + * + * @param paragraphs 문단 리스트 + * @return 직렬화된 문자열 + */ + public String serializeParagraphs(List paragraphs) { + if (paragraphs == null || paragraphs.isEmpty()) { + return ""; + } + + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < paragraphs.size(); i++) { + if (i > 0) { + sb.append("PARAGRAPH_BREAK"); + } + sb.append(paragraphs.get(i)); + } + + return sb.toString(); + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/writer/ArticleWriter.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/writer/ArticleWriter.java new file mode 100644 index 0000000..ad5c8d4 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/collector/writer/ArticleWriter.java @@ -0,0 +1,203 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.collector.writer; + +import java.util.List; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; + +import org.springframework.batch.item.Chunk; +import org.springframework.batch.item.ItemWriter; +import org.springframework.dao.DataIntegrityViolationException; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.repository.NewsInfoJpaRepository; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +import static java.util.Map.entry; + +/** + * 기사 데이터를 DB에 저장하는 Spring Batch ItemWriter 구현체. + * 중복된 링크는 저장하지 않으며, 새롭게 저장된 기사 수를 로그로 출력한다. + * 매퍼에서 전달된 문단 구분자(PARAGRAPH_BREAK)를 기준으로 문단을 분리하고 직렬화한다. + * + * - 입력: 기사 리스트(List) + * - 처리: 문단 분리, 직렬화, 중복 여부 확인 후 저장 + * - 출력: 로그 출력 (중복 제외) + * + * @since 2025-05-10 + * @modified 2025-05-15 직렬화 확인 로직 추가 + * @modified 2025-05-16 PARAGRAPH_BREAK 기반 문단 처리 추가 + */ +@Slf4j +@Component +@RequiredArgsConstructor +public class ArticleWriter implements ItemWriter> { + + private final NewsInfoJpaRepository newsInfoJpaRepository; + private static final String PARAGRAPH_BREAK = "PARAGRAPH_BREAK"; + + /** + * 기사 리스트를 저장하며, 중복된 기사는 건너뛴다. + * 저장 성공 시 개수를 집계하고, 로그로 남긴다. + * + * @param chunk Spring Batch가 전달하는 기사 리스트 Chunk + * @since 2025-05-10 + * @author 함예정 + */ + @Override + public void write(Chunk> chunk) { + AtomicInteger savedCount = new AtomicInteger(); + chunk.getItems().stream() + .flatMap(List::stream) + .peek(this::processAndSerializeDescription) + .filter(item -> !newsInfoJpaRepository.existsByLink(item.getLink())) + .forEach(item -> {saveItem(item, savedCount);}); + + log.info("새로 저장된 뉴스 개수: {}", savedCount.get()); + } + + /** + * 설명(description) 필드를 처리하고 JSON 형식으로 직렬화 + * 매퍼에서 전달된 PARAGRAPH_BREAK를 기준으로 문단을 분리하기 + * + * @param item 처리할 ArticleEntity 객체 + */ + private void processAndSerializeDescription(ArticleEntity item) { + String description = item.getDescription(); + if (description == null || description.isEmpty()) { + item.setDescription("[]"); + return; + } + + if (isAlreadyJsonFormat(description)) { + return; + } + + List paragraphs = splitIntoParagraphs(description); + String jsonDescription = serializeToJson(paragraphs); + item.setDescription(jsonDescription); + + } + + /** + * 문자열이 JSON 형식인지 확인 + * + * @param text 확인할 문자열 + * @return JSON 형식이면 true, 아니면 false + */ + private boolean isAlreadyJsonFormat(String text) { + return text.trim().startsWith("[") && text.trim().endsWith("]"); + } + + /** + * 텍스트를 PARAGRAPH_BREAK를 기준으로 문단으로 분리 + * + * @param text 분리할 텍스트 + * @return 분리된 문단 리스트 + */ + private List splitIntoParagraphs(String text) { + if (!text.contains(PARAGRAPH_BREAK)) { + log.warn("PARAGRAPH_BREAK 구분자가 없는 텍스트 감지: {}", + text.substring(0, Math.min(text.length(), 50)) + "..."); + return Arrays.asList(text); + } + + String[] paragraphArray = text.split(PARAGRAPH_BREAK); + List paragraphs = new ArrayList<>(); + + for (String paragraph : paragraphArray) { + String trimmed = paragraph.trim(); + if (!trimmed.isEmpty()) { + paragraphs.add(trimmed); + } + } + + if (paragraphs.isEmpty()) { + paragraphs.add(text); + } + + return paragraphs; + } + + /** + * 문단 리스트를 JSON으로 직렬화한다. + * + * @param paragraphs 직렬화할 문단 리스트 + * @return JSON 형식의 문자열 + */ + private String serializeToJson(List paragraphs) { + if (paragraphs == null || paragraphs.isEmpty()) { + return "[]"; + } + + StringBuilder sb = new StringBuilder("["); + for (int i = 0; i < paragraphs.size(); i++) { + String paragraph = paragraphs.get(i); + String escaped = escapeJsonString(paragraph); + sb.append("\"").append(escaped).append("\""); + if (i < paragraphs.size() - 1) { + sb.append(","); + } + } + sb.append("]"); + + return sb.toString(); + } + + private static final Map JSON_ESCAPES = Map.ofEntries( + entry('\"', "\\\""), + entry('\\', "\\\\"), + entry('/', "\\/"), + entry('\b', "\\b"), + entry('\f', "\\f"), + entry('\n', "\\n"), + entry('\r', "\\r"), + entry('\t', "\\t") + ); + + /** + * JSON 문자열 이스케이프 처리 + * + * @param input 이스케이프할 문자열 + * @return 이스케이프된 문자열 + */ + protected String escapeJsonString(String input) { + if (input == null) { + return ""; + } + return input.chars() + .mapToObj(cp -> { + char c = (char) cp; + if (JSON_ESCAPES.containsKey(c)) { + return JSON_ESCAPES.get(c); + } + if (cp < 0x20) { + return String.format("\\u%04x", cp); + } + return String.valueOf(c); + }) + .collect(Collectors.joining()); + } + + /** + * DB에 뉴스를 저장하고, 저장된 개수를 증가시킨다. + * + * @param item 저장할 뉴스 + * @param savedCount 저장된 갯수 + * @author 함예정 + * @since 2025-05-12 + */ + private void saveItem(ArticleEntity item, AtomicInteger savedCount) { + try { + newsInfoJpaRepository.save(item); + savedCount.incrementAndGet(); + } catch (DataIntegrityViolationException e) { + log.debug("중복 항목 감지: {}", item.getLink()); + } + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/config/ArticleEmbeddingJobConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/config/ArticleEmbeddingJobConfig.java new file mode 100644 index 0000000..ea25f74 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/config/ArticleEmbeddingJobConfig.java @@ -0,0 +1,52 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.config; + +import org.springframework.batch.core.Job; +import org.springframework.batch.core.Step; +import org.springframework.batch.core.job.builder.JobBuilder; +import org.springframework.batch.core.repository.JobRepository; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +/** + * Spring Batch Job 설정 클래스.

+ * 요약된 뉴스를 OpenAi를 활용해 + * Embedding Vector를 계산하는 + * Batch Job 을 정의하며, 파티셔닝된 Step 을 시작 단계로 구성한다.

+ * + * 이 잡은 {@code articleCollectorBatchJob}이라는 이름으로 정의되며, + * {@link Step} 객체는 외부에서 주입받아 사용한다.

+ * + * 해당 Job 은 Quartz 또는 Spring Scheduler 를 통해 주기적으로 실행될 수 있다.

+ * + * @since 2025-05-17 + */ +@Configuration +public class ArticleEmbeddingJobConfig { + private final String jobName = "articleEmbeddingJob"; + private final JobRepository jobRepository; + private final Step articleEmbeddingStep; + + public ArticleEmbeddingJobConfig( + JobRepository jobRepository, + Step articleEmbeddingStep) { + + this.jobRepository = jobRepository; + this.articleEmbeddingStep = articleEmbeddingStep; + } + + /** + * 뉴스 요약 정보를 임베딩 Vector로 계산하는 + * Spring Batch Job Bean을 생성한다. + * 파티셔닝 Step 을 실행하도록 구성한다. + * + * @return 뉴스 임베딩 Job + * @author 함예정 + * @since 2025-05-17 + */ + @Bean + public Job articleEmbeddingJob() { + return new JobBuilder(jobName, jobRepository) + .start(articleEmbeddingStep) + .build(); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/config/ArticleEmbeddingStepConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/config/ArticleEmbeddingStepConfig.java new file mode 100644 index 0000000..cdb7cf9 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/config/ArticleEmbeddingStepConfig.java @@ -0,0 +1,101 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.config; + +import org.springframework.batch.core.Step; +import org.springframework.batch.core.repository.JobRepository; +import org.springframework.batch.core.step.builder.StepBuilder; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.core.task.TaskExecutor; +import org.springframework.transaction.PlatformTransactionManager; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.exception.EmbeddingException; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.partitioner.ArticleEmbeddingPartitioner; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.processor.ArticleEmbeddingProcessor; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.reader.ArticleEmbeddingPageReader; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.writer.ArticleEmbeddingWriter; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.converter.exception.JpaConvertorException; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; + +/** + * 뉴스 기사 임베딩 작업을 위한 Spring Batch Step 설정 클래스. + * - 총 페이지 수 계산을 위한 tasklet step + * - 마스터-슬레이브 파티셔닝 기반 임베딩 처리 step 구성 + * + * @since 2025-05-17 + */ +@Configuration +public class ArticleEmbeddingStepConfig { + private final String executorName = "normalExecutor"; + private final int gridSize = 5; + private final int chunkSize = 100; + private final int retryLimit = 3; + private final int skipLimit = 100; + + private final JobRepository jobRepository; + private final PlatformTransactionManager transactionManager; + private final TaskExecutor taskExecutor; + + public ArticleEmbeddingStepConfig( + JobRepository jobRepository, + PlatformTransactionManager platformTransactionManager, + @Qualifier(executorName) TaskExecutor taskExecutor) { + this.jobRepository = jobRepository; + this.transactionManager = platformTransactionManager; + this.taskExecutor = taskExecutor; + } + + /** + * 기사 임베딩 처리를 마스터-슬레이브 구조로 병렬 처리하기 위한 마스터 Step을 생성한다. + * + * @param partitioner 파티셔닝 전략 구현체 + * @param articleEmbeddingSlaveStep 실제 데이터 처리를 수행하는 슬레이브 Step + * @return 마스터 파티셔닝 Step + * @author 함예정 + * @since 2025-05-17 + */ + @Bean + public Step articleEmbeddingStep( + ArticleEmbeddingPartitioner partitioner, + Step articleEmbeddingSlaveStep) { + + return new StepBuilder("articleEmbeddingStep", jobRepository) + .partitioner("articleEmbeddingSlavePart", partitioner) + .step(articleEmbeddingSlaveStep) + .taskExecutor(taskExecutor) + .gridSize(gridSize) + .build(); + } + + /** + * 한 파티션 내에서 요약된 뉴스 내용을 기준으로 임베딩 벡터를 생성하고 + * DB에 저장하는 슬레이브 Step을 생성한다. + * 지정된 예외에 대해 재시도 및 스킵 처리를 통해 장애 허용 처리를 수행한다. + * + * @param reader 임베딩 대상 뉴스 기사 데이터를 페이지 단위로 읽어오는 Reader + * @param processor 뉴스 요약을 임베딩 벡터로 변환하는 Processor + * @param writer 임베딩된 결과를 DB에 저장하는 Writer + * @return 슬레이브 Step + * @author 함예정 + * @since 2025-05-17 + */ + @Bean + public Step articleEmbeddingSlaveStep( + ArticleEmbeddingPageReader reader, + ArticleEmbeddingProcessor processor, + ArticleEmbeddingWriter writer) { + + return new StepBuilder("articleEmbeddingSlaveStep", jobRepository) + .chunk(chunkSize, transactionManager) + .reader(reader) + .processor(processor) + .writer(writer) + .faultTolerant() + .retry(EmbeddingException.class) + .retryLimit(retryLimit) + .skip(EmbeddingException.class) + .skip(JpaConvertorException.class) + .skipLimit(skipLimit) + .build(); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/exception/EmbeddingException.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/exception/EmbeddingException.java new file mode 100644 index 0000000..4aff840 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/exception/EmbeddingException.java @@ -0,0 +1,23 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.exception; + +import com.likelion.backendplus4.talkpick.batch.common.exception.CustomException; +import com.likelion.backendplus4.talkpick.batch.common.exception.error.ErrorCode; + +public class EmbeddingException extends CustomException { + private final ErrorCode errorCode; + + public EmbeddingException(ErrorCode errorCode) { + super(errorCode); + this.errorCode = errorCode; + } + + public EmbeddingException(ErrorCode errorCode, Throwable cause) { + super(errorCode, cause); + this.errorCode = errorCode; + } + + @Override + public ErrorCode getErrorCode() { + return errorCode; + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/exception/error/EmbeddingErrorCode.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/exception/error/EmbeddingErrorCode.java new file mode 100644 index 0000000..ed8d00e --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/exception/error/EmbeddingErrorCode.java @@ -0,0 +1,57 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.exception.error; + +import org.springframework.http.HttpStatus; + +import com.likelion.backendplus4.talkpick.batch.common.exception.error.ErrorCode; + +import lombok.RequiredArgsConstructor; + +/** + * 에러 코드 인터페이스 각 에러 항목에 대한 HTTP 상태, 에러 번호, 메시지를 제공한다. + * A[BB][CCC] + * A (1자리) : 에러 심각도 (1~5) + * 1: 클라이언트 오류 + * 2: 인증 관련 오류 + * 3: 사용자 관련 오류 + * 4: 서버 오류 + * 5: 시스템 오류 + * + * BB (2자리) : 도메인 코드 + * 10: 사용자 관련 (ex: USER_NOT_FOUND) + * 20: 인증 관련 (ex: AUTHORIZATION_FAILED) + * 30: DB 관련 오류 (ex: DB_CONNECTION_FAILED) + * 40: API 관련 오류 (ex: API_TIMEOUT) + * 50: 시스템 오류 (ex: INTERNAL_SERVER_ERROR) + * + * CCC (3자리) : 세부 오류 순번 + * 001: 첫 번째 오류 + * 002: 두 번째 오류 + * 003: 세 번째 오류, 등등 + * + * @since 2025-05-09 + */ +@RequiredArgsConstructor +public enum EmbeddingErrorCode implements ErrorCode { + MODEL_CREATION_ERROR(HttpStatus.INTERNAL_SERVER_ERROR, 440001, "임베딩 모델 생성 실패"), + API_CALL_ERROR(HttpStatus.SERVICE_UNAVAILABLE, 440002, "임베딩 API 호출 실패"), + ITEM_NOT_FOUND(HttpStatus.SERVICE_UNAVAILABLE, 450001, "임베딩할 아이템이 없습니다"); + + private final HttpStatus status; + private final int code; + private final String message; + + @Override + public HttpStatus httpStatus() { + return status; + } + + @Override + public int codeNumber() { + return code; + } + + @Override + public String message() { + return message; + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/partitioner/ArticleEmbeddingPartitioner.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/partitioner/ArticleEmbeddingPartitioner.java new file mode 100644 index 0000000..88db515 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/partitioner/ArticleEmbeddingPartitioner.java @@ -0,0 +1,97 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.partitioner; + +import java.util.List; +import java.util.Map; + +import org.springframework.batch.core.partition.support.Partitioner; +import org.springframework.batch.item.ExecutionContext; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.exception.EmbeddingException; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.exception.error.EmbeddingErrorCode; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.support.partitioner.IdRangePartitionCalculator; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.support.partitioner.PartitionMapBuilder; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.support.partitioner.dto.ArticleIdRange; + +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +/** + * {@link Partitioner} 구현체로, 전체 페이지 수(totalPages)를 기준으로 + * 병렬 처리할 수 있도록 페이지 범위를 gridSize만큼 균등하게 분할한다. + * + *

Spring Batch에서 멀티스레드로 작업을 병렬 처리할 때 사용되며, + * 각 ExecutionContext에는 'startPage'와 'endPage'가 설정된다. + * + * @since 2025-05-17 + */ +@Slf4j +@Component +@RequiredArgsConstructor +public class ArticleEmbeddingPartitioner implements Partitioner { + private static final String QUERY_GET_MIN_ID = """ + SELECT MIN(a.id) FROM ArticleEntity a WHERE a.summary IS NOT NULL AND a.summaryVector IS NULL + """; + private static final String QUERY_GET_MAX_ID = """ + SELECT MAX(a.id) FROM ArticleEntity a WHERE a.summary IS NOT NULL AND a.summaryVector IS NULL + """; + + private final IdRangePartitionCalculator calculator; + + @PersistenceContext + private final EntityManager entityManager; + + /** + * ID 범위를 기준으로 gridSize만큼 파티션을 분할하여 반환한다. + * + * @param gridSize 생성할 파티션 수 + * @return 각 파티션의 ExecutionContext를 담은 맵 + * @author 함예정 + * @since 2025-05-18 + */ + @Override + public Map partition(int gridSize) { + log.info("Partitioning article embedding partitioner with gridSize: {}", gridSize); + Long minId = createQuery(QUERY_GET_MIN_ID); + Long maxId = createQuery(QUERY_GET_MAX_ID); + + throwIfInvalidIdRange(minId, maxId); + List ranges = calculator.calculate(minId, maxId, gridSize); + return PartitionMapBuilder.build(ranges); + } + + /** + * 주어진 JPQL 쿼리를 실행하여 Long 타입 결과를 조회한다. + * + * @param query 실행할 JPQL 쿼리 문자열 + * @return 쿼리 결과 값 + * @author 함예정 + * @since 2025-05-17 + */ + private Long createQuery(String query) { + return entityManager.createQuery( + query, + Long.class + ).getSingleResult(); + } + + /** + * 조회된 ID 범위가 유효하지 않을 경우 예외를 발생시킨다. + * + * minId 또는 maxId가 null이거나, minId가 maxId보다 큰 경우 + * {@link EmbeddingException}을 {@link EmbeddingErrorCode#ITEM_NOT_FOUND}와 함께 발생시킨다. + * + * @param minId ID 범위의 최소값 + * @param maxId ID 범위의 최대값 + * @throws EmbeddingException 유효하지 않은 ID 범위일 경우 + * @author 함예정 + * @since 2025-05-18 + */ + private void throwIfInvalidIdRange(Long minId, Long maxId) { + if (minId == null || maxId == null || minId > maxId) { + throw new EmbeddingException(EmbeddingErrorCode.ITEM_NOT_FOUND); + } + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/processor/ArticleEmbeddingProcessor.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/processor/ArticleEmbeddingProcessor.java new file mode 100644 index 0000000..5255875 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/processor/ArticleEmbeddingProcessor.java @@ -0,0 +1,113 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.processor; + +import java.util.List; + +import org.springframework.ai.document.MetadataMode; +import org.springframework.ai.embedding.EmbeddingResponse; +import org.springframework.ai.openai.OpenAiEmbeddingModel; +import org.springframework.ai.openai.OpenAiEmbeddingOptions; +import org.springframework.ai.openai.api.OpenAiApi; +import org.springframework.ai.retry.RetryUtils; +import org.springframework.batch.item.ItemProcessor; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.exception.EmbeddingException; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.exception.error.EmbeddingErrorCode; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; + +import lombok.extern.slf4j.Slf4j; + +/** + * 뉴스 기사 요약(summary)을 기반으로 임베딩 벡터를 생성하고 + * 해당 벡터를 ArticleEntity에 설정하는 ItemProcessor 구현체. + * Spring Batch의 처리 단계에서 사용된다. + * + * @since 2025-05-17 + */ +@Component +@Slf4j +public class ArticleEmbeddingProcessor implements ItemProcessor { + private final OpenAiApi openAiApi; + private final String embeddingModelName; + + public ArticleEmbeddingProcessor(OpenAiApi openAiApi, + @Value("${spring.ai.openai.embedding-model}") String embeddingModelName) { + this.openAiApi = openAiApi; + this.embeddingModelName = embeddingModelName; + } + + /** + * ArticleEntity의 summary 필드를 기반으로 임베딩 벡터를 생성하고, + * 해당 벡터를 summaryVector 필드에 설정하여 반환한다. + * + * @param item 임베딩할 summary를 가진 ArticleEntity + * @return summaryVector가 설정된 ArticleEntity + * @author 함예정 + * @since 2025-05-17 + */ + @Override + public ArticleEntity process(ArticleEntity item) { + log.info("뉴스 임베딩: id = {}, guid = {}, Thread = {}", + item.getId(), item.getGuid(), Thread.currentThread().getName()); + String newsContent = item.getSummary(); + float[] vector = getEmbedding(newsContent); + return item.changeSummaryVector(vector); + } + + /** + * 주어진 텍스트에 대해 임베딩 벡터(float 배열)를 생성한다. + * 내부적으로 OpenAI 임베딩 모델을 생성하고 실행한다. + * + * @param text 임베딩할 입력 텍스트 + * @return 텍스트에 대한 임베딩 벡터 + * @author 정안식 + * @since 2025-05-11 + */ + private float[] getEmbedding(String text) { + OpenAiEmbeddingModel model = createModel(); + return executeEmbedding(model, text); + } + + /** + * OpenAI 임베딩 모델 인스턴스를 생성한다. + * + * @return 생성된 OpenAiEmbeddingModel 객체 + * @throws EmbeddingException 모델 생성 중 오류 발생 시 + * @author 정안식 + * @since 2025-05-11 + */ + private OpenAiEmbeddingModel createModel() { + try { + return new OpenAiEmbeddingModel( + openAiApi, + MetadataMode.EMBED, + OpenAiEmbeddingOptions.builder() + .model(embeddingModelName) + .build(), + RetryUtils.DEFAULT_RETRY_TEMPLATE + ); + } catch (Exception e) { + throw new EmbeddingException(EmbeddingErrorCode.MODEL_CREATION_ERROR, e); + } + } + + /** + * 주어진 모델을 사용하여 텍스트 임베딩을 계산한다. + * + * @param model OpenAiEmbeddingModel 인스턴스 + * @param text 입력 텍스트 + * @return 계산된 임베딩 벡터 배열 + * @throws EmbeddingException API 호출 중 오류 발생 시 + * @author 정안식 + * @since 2025-05-11 + */ + private float[] executeEmbedding(OpenAiEmbeddingModel model, String text) { + try { + EmbeddingResponse response = model.embedForResponse(List.of(text)); + return response.getResults().getFirst().getOutput(); + } catch (Exception e) { + throw new EmbeddingException(EmbeddingErrorCode.API_CALL_ERROR, e); + } + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/reader/ArticleEmbeddingPageReader.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/reader/ArticleEmbeddingPageReader.java new file mode 100644 index 0000000..ea00f95 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/reader/ArticleEmbeddingPageReader.java @@ -0,0 +1,42 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.reader; + +import java.util.HashMap; +import java.util.Map; + +import org.springframework.batch.core.configuration.annotation.StepScope; +import org.springframework.batch.item.database.JpaPagingItemReader; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; + +import jakarta.persistence.EntityManagerFactory; +import lombok.extern.slf4j.Slf4j; + +@Component +@StepScope +@Slf4j +public class ArticleEmbeddingPageReader extends JpaPagingItemReader { + private static final String JPQL = """ + SELECT a + FROM ArticleEntity a + WHERE a.summaryVector IS NULL + AND a.summary IS NOT NULL + AND a.id BETWEEN :minId AND :maxId + """; + public ArticleEmbeddingPageReader( + EntityManagerFactory entityManagerFactory, + @Value("#{stepExecutionContext[minId]}") Long minId, + @Value("#{stepExecutionContext[maxId]}") Long maxId) { + + this.setName("articleEmbeddingReader-" + minId + "-" + maxId); + this.setEntityManagerFactory(entityManagerFactory); + this.setQueryString(JPQL); + Map params = new HashMap<>(); + params.put("minId", minId); + params.put("maxId", maxId); + this.setParameterValues(params); + this.setSaveState(false); + log.info("Initialized reader for ID range {} ~ {}", minId, maxId); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/writer/ArticleEmbeddingWriter.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/writer/ArticleEmbeddingWriter.java new file mode 100644 index 0000000..484deca --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/batch/writer/ArticleEmbeddingWriter.java @@ -0,0 +1,21 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.writer; + +import org.springframework.batch.item.Chunk; +import org.springframework.batch.item.ItemWriter; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.repository.NewsInfoJpaRepository; + +import lombok.RequiredArgsConstructor; + +@Component +@RequiredArgsConstructor +public class ArticleEmbeddingWriter implements ItemWriter { + private final NewsInfoJpaRepository newsInfoJpaRepository; + + @Override + public void write(Chunk chunk) { + newsInfoJpaRepository.saveAll(chunk); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/quartz/config/ArticleEmbeddingJobExecutor.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/quartz/config/ArticleEmbeddingJobExecutor.java new file mode 100644 index 0000000..763ebf8 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/quartz/config/ArticleEmbeddingJobExecutor.java @@ -0,0 +1,76 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.quartz.config; + +import org.quartz.DisallowConcurrentExecution; +import org.quartz.JobExecutionContext; +import org.springframework.batch.core.Job; +import org.springframework.batch.core.JobParameters; +import org.springframework.batch.core.JobParametersBuilder; +import org.springframework.batch.core.launch.JobLauncher; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; +import com.likelion.backendplus4.talkpick.batch.news.article.exception.BatchJobExceptionTranslator; +import com.likelion.backendplus4.talkpick.batch.news.article.exception.error.ArticleCollectorErrorCode; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +/** + * Quartz에 의해 트리거되는 Spring Batch Job 실행 클래스. + * JobLauncher를 통해 {@code rssJob}을 수동 실행하며, 매 실행 시 고유한 JobParameters를 생성하여 중복 실행을 방지한다. + * + * - @DisallowConcurrentExecution: 이전 실행이 끝나기 전에는 새로운 실행이 중첩되지 않도록 제한 + * - JobParameters에 timestamp를 포함시켜 매번 다른 인스턴스로 실행되도록 설정 + * + * 이 클래스는 단순한 실행자 역할만 수행한다. + * + * @since 2025-05-17 + */ +@Component +@Slf4j +@DisallowConcurrentExecution +@RequiredArgsConstructor +public class ArticleEmbeddingJobExecutor implements org.quartz.Job { + private final JobLauncher jobLauncher; + private final Job summaryJob; + private final Job articleEmbeddingJob; + private final BatchJobExceptionTranslator batchJobExceptionTranslator; + + /** + * Quartz 트리거에 의해 호출되는 메서드. + * 내부적으로 Spring Batch Job을 실행하는 로직을 위임한다. + * + * @param jobExecutionContext Quartz 실행 컨텍스트 + * @author 함예정 + * @since 2025-05-17 + */ + @Override + public void execute(JobExecutionContext jobExecutionContext) { + startSpringBatchJob(); + } + + /** + * Spring Batch Job을 JobLauncher를 통해 실행한다. + * 각 실행마다 timestamp 파라미터를 부여하여 중복 실행 방지. + * 예외 발생 시 {@link ArticleCollectorException}으로 변환하여 처리한다. + * + * @author 함예정 + * @since 2025-05-17 + */ + private void startSpringBatchJob() { + JobParameters params = new JobParametersBuilder() + .addLong("timestamp", System.currentTimeMillis()) + .toJobParameters(); + + try { + log.info("Quartz Job 실행 - 요약"); + jobLauncher.run(summaryJob, params); + + log.info("Quartz Job 실행 - Embedding"); + jobLauncher.run(articleEmbeddingJob, params); + } catch (Exception e) { + ArticleCollectorErrorCode exceptionCode = batchJobExceptionTranslator.translate(e); + throw new ArticleCollectorException(exceptionCode); + } + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/quartz/config/ArticleEmbeddingQuartzJobConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/quartz/config/ArticleEmbeddingQuartzJobConfig.java new file mode 100644 index 0000000..f380509 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/quartz/config/ArticleEmbeddingQuartzJobConfig.java @@ -0,0 +1,40 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.quartz.config; + +import org.quartz.JobBuilder; +import org.quartz.JobDetail; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +import lombok.Getter; +import lombok.extern.slf4j.Slf4j; + +/** + * 수집된 뉴스를 요약하고 임베딩을 계산하는 스케줄 작업 + * application.yml의 spring.quartz.article-embedding.cron 속성으로 실행 주기 설정 + * @since 2025-05-17 + */ +@Slf4j +@Configuration +public class ArticleEmbeddingQuartzJobConfig { + @Getter + private static final String ARTICLE_EMBEDDING_JOB_DETAIL_NAME = "articleEmbeddingJobDetail"; + + /** + * RSS 수집 Quartz JobDetail 빈 등록. + * Job 클래스는 {@link ArticleEmbeddingJobExecutor}이며 다음과 같은 설정을 포함한다: + * - withIdentity("rssBatchJob"): Scheduler 내에서 이 Job을 고유하게 식별하기 위한 이름 지정 + * - storeDurably(): Trigger가 없더라도 Scheduler에 등록된 상태로 유지되도록 설정 + * + * @return RSS 배치 작업용 JobDetail 객체 + * @author 함예정 + * @since 2025-05-17 + */ + @Bean(ARTICLE_EMBEDDING_JOB_DETAIL_NAME) + public JobDetail articleEmbeddingJobDetail() { + return JobBuilder.newJob(ArticleEmbeddingJobExecutor.class) + .withIdentity(ARTICLE_EMBEDDING_JOB_DETAIL_NAME) + .storeDurably() + .build(); + } + +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/quartz/config/ArticleEmbeddingQuartzTriggerConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/quartz/config/ArticleEmbeddingQuartzTriggerConfig.java new file mode 100644 index 0000000..1a1e0f9 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/embedding/quartz/config/ArticleEmbeddingQuartzTriggerConfig.java @@ -0,0 +1,57 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.quartz.config; + +import org.quartz.CronScheduleBuilder; +import org.quartz.JobDetail; +import org.quartz.Trigger; +import org.quartz.TriggerBuilder; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +import lombok.extern.slf4j.Slf4j; + +@Configuration +@Slf4j +public class ArticleEmbeddingQuartzTriggerConfig { + private final String cronExpression; + private final JobDetail articleEmbeddingJobDetail; + private final String articleEmbeddingJobDetailName = + ArticleEmbeddingQuartzJobConfig.getARTICLE_EMBEDDING_JOB_DETAIL_NAME(); + + /** + * 생성자 주입을 통해 Cron 표현식을 설정한다. + * + * @param cronExpression RSS 배치 실행 주기를 정의하는 Cron 표현식 + * application.yml에서 spring.quartz.article-embedding.cron 값을 로드 합니다. + * @author 함예정 + * @since 2025-05-17 + */ + public ArticleEmbeddingQuartzTriggerConfig( + @Value("${spring.quartz.article-embedding.cron}") String cronExpression, + @Qualifier("articleEmbeddingJobDetail") + JobDetail articleEmbeddingJobDetail) { + this.cronExpression = cronExpression; + this.articleEmbeddingJobDetail = articleEmbeddingJobDetail; + } + + /** + * 내용 임베딩 작업을 위한 Quartz Trigger 빈 등록. + * - forJob: 이 Trigger 가 어떤 Quartz Job 과 연관되어 실행될지를 지정 + * - withIdentity: Scheduler 내에서 이 Trigger 를 고유하게 식별하기 위한 이름 지정 + * - withSchedule: Cron 표현식을 사용하여 실행 주기 설정 + * + * @return RSS 배치 작업용 Trigger 객체 + * @author 함예정 + * @since 2025-05-17 + */ + @Bean + public Trigger articleEmbeddingQuartzTrigger() { + log.info("Quartz Trigger: " + articleEmbeddingJobDetailName); + return TriggerBuilder.newTrigger() + .forJob(articleEmbeddingJobDetail) + .withIdentity(articleEmbeddingJobDetailName + "trigger") + .withSchedule(CronScheduleBuilder.cronSchedule(cronExpression)) + .build(); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/adapter/NewsInfoProviderAdapter.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/adapter/NewsInfoProviderAdapter.java new file mode 100644 index 0000000..b3b3a03 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/adapter/NewsInfoProviderAdapter.java @@ -0,0 +1,49 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.adapter; + +import java.util.List; + +import org.springframework.context.annotation.Primary; +import org.springframework.data.domain.PageRequest; +import org.springframework.data.domain.Pageable; +import org.springframework.data.domain.Sort; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.index.application.port.out.NewsInfoProviderPort; +import com.likelion.backendplus4.talkpick.batch.index.domain.model.NewsInfo; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.mapper.ArticleEntityMapper; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.repository.NewsInfoJpaRepository; + +import lombok.RequiredArgsConstructor; + +/** + * TODO: 이벤트 기반으로 색인 안된 뉴스만 제공하도록 수정 필요 + * NewsInfoProviderPort 인터페이스의 구현체로, + * JPA 리포지토리를 통해 뉴스 정보를 조회하는 어댑터 클래스입니다. + * 현재는 최근 100개 뉴스를 반환합니다. + * @since 2025-05-14 + */ +@Component +@RequiredArgsConstructor +public class NewsInfoProviderAdapter implements NewsInfoProviderPort { + private static final int MAX_NEWS_COUNT = 100; + private final NewsInfoJpaRepository newsInfoJpaRepository; + + /** + * 뉴스 정보를 최신순으로 최대 100건까지 조회하여 도메인 객체 리스트로 반환합니다. + * + * @return 뉴스 도메인 객체 리스트 + * @author 함예정 + * @since 2025-05-14 + */ + @Override + public List fetchAll() { + Pageable pageable = PageRequest.of(0, MAX_NEWS_COUNT) + .withSort(Sort.by("pubDate").descending()); + + return newsInfoJpaRepository.findAll(pageable) + .getContent() + .stream() + .map(ArticleEntityMapper::toDomainFromEntity) + .toList(); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/converter/FloatArrayToJsonConverter.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/converter/FloatArrayToJsonConverter.java new file mode 100644 index 0000000..a3d7b58 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/converter/FloatArrayToJsonConverter.java @@ -0,0 +1,100 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.converter; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.converter.exception.JpaConvertorException; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.converter.exception.error.JpaConvertorErrorCode; + +import jakarta.persistence.AttributeConverter; +import jakarta.persistence.Converter; + +/** + * {@link AttributeConverter} 구현체로, float 배열(float[])을 JSON 문자열로 직렬화하거나 + * JSON 문자열을 float 배열로 역직렬화하여 MySQL JSON 타입 컬럼과 매핑한다. + * + *

직렬화/역직렬화 과정에서 오류가 발생하면 {@link JpaConvertorException}을 발생시키며, + * 오류 유형은 {@link JpaConvertorErrorCode#JSON_CONVERT_ERROR}로 정의된다. + * + *

MySQL에는 배열 타입이 없으므로, 배열 데이터를 JSON 형태로 저장하고 읽어오는 데 유용하다. + * + * @since 2025-05-17 + */ +@Converter +public class FloatArrayToJsonConverter implements AttributeConverter { + + private final ObjectMapper objectMapper = new ObjectMapper(); + + /** + * float 배열을 JSON 문자열로 변환하여 DB에 저장한다. + * + * @param attribute float 배열 + * @return JSON 문자열 + * @author 함예정 + * @since 2025-05-17 + */ + @Override + public String convertToDatabaseColumn(float[] attribute) { + if (attribute == null || attribute.length == 0) { + return null; + } + return toStringFromFloatArray(attribute); + } + + /** + * DB에서 조회된 JSON 문자열을 float 배열로 변환하여 엔티티에 주입한다. + * + * @param dbData DB에서 조회된 JSON 문자열 + * @return float 배열 + * @author 함예정 + * @since 2025-05-17 + */ + @Override + public float[] convertToEntityAttribute(String dbData) { + if (isNullDbData(dbData)) { + return new float[0]; + } + return toFloatArrayFromString(dbData); + } + + /** + * dbData가 null 또는 빈 문자열인지 확인한다. + * + * @return boolean + * @author 함예정 + * @since 2025-05-17 + */ + private boolean isNullDbData(String dbData) { + return dbData == null || dbData.isEmpty(); + } + + /** + * float 배열을 JSON 문자열로 직렬화한다. + * + * @return Json 문자열 + * @author 함예정 + * @since 2025-05-17 + */ + private String toStringFromFloatArray(float[] attribute) { + try { + return objectMapper.writeValueAsString(attribute); + } catch (JsonProcessingException e) { + throw new JpaConvertorException(JpaConvertorErrorCode.JSON_CONVERT_ERROR, e); + } + } + + /** + * JSON 문자열을 float 배열로 역직렬화한다. + * + * @return float 배열 + * @author 함예정 + * @since 2025-05-17 + */ + private float[] toFloatArrayFromString(String dbData) { + try { + return objectMapper.readValue(dbData, float[].class); + } catch (JsonProcessingException e) { + throw new JpaConvertorException(JpaConvertorErrorCode.JSON_CONVERT_ERROR, e); + } + } + +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/converter/exception/JpaConvertorException.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/converter/exception/JpaConvertorException.java new file mode 100644 index 0000000..098f1ae --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/converter/exception/JpaConvertorException.java @@ -0,0 +1,31 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.converter.exception; + +import com.likelion.backendplus4.talkpick.batch.common.exception.CustomException; +import com.likelion.backendplus4.talkpick.batch.common.exception.error.ErrorCode; + +/** + * JPA AttributeConverter에서 변환 중 발생한 예외를 나타내는 커스텀 예외 클래스. + * + *

일반적으로 JSON 직렬화/역직렬화 중 오류가 발생했을 때 사용되며, + * {@link CustomException}을 상속하고 {@link ErrorCode}를 통해 상세 오류 정보를 제공한다. + * + * @since 2025-05-17 + */ +public class JpaConvertorException extends CustomException { + private final ErrorCode errorCode; + + public JpaConvertorException(ErrorCode errorCode) { + super(errorCode); + this.errorCode = errorCode; + } + + public JpaConvertorException(ErrorCode errorCode, Throwable cause) { + super(errorCode, cause); + this.errorCode = errorCode; + } + + @Override + public ErrorCode getErrorCode() { + return errorCode; + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/converter/exception/error/JpaConvertorErrorCode.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/converter/exception/error/JpaConvertorErrorCode.java new file mode 100644 index 0000000..25b9bb8 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/converter/exception/error/JpaConvertorErrorCode.java @@ -0,0 +1,55 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.converter.exception.error; + +import org.springframework.http.HttpStatus; + +import com.likelion.backendplus4.talkpick.batch.common.exception.error.ErrorCode; + +import lombok.RequiredArgsConstructor; + +/** + * 에러 코드 인터페이스 각 에러 항목에 대한 HTTP 상태, 에러 번호, 메시지를 제공한다. + * A[BB][CCC] + * A (1자리) : 에러 심각도 (1~5) + * 1: 클라이언트 오류 + * 2: 인증 관련 오류 + * 3: 사용자 관련 오류 + * 4: 서버 오류 + * 5: 시스템 오류 + * + * BB (2자리) : 도메인 코드 + * 10: 사용자 관련 (ex: USER_NOT_FOUND) + * 20: 인증 관련 (ex: AUTHORIZATION_FAILED) + * 30: DB 관련 오류 (ex: DB_CONNECTION_FAILED) + * 40: API 관련 오류 (ex: API_TIMEOUT) + * 50: 시스템 오류 (ex: INTERNAL_SERVER_ERROR) + * + * CCC (3자리) : 세부 오류 순번 + * 001: 첫 번째 오류 + * 002: 두 번째 오류 + * 003: 세 번째 오류, 등등 + * + * @since 2025-05-17 + */ +@RequiredArgsConstructor +public enum JpaConvertorErrorCode implements ErrorCode { + JSON_CONVERT_ERROR(HttpStatus.INTERNAL_SERVER_ERROR, 440003, "JSON 컨버터 타입 변환 실패"); + + private final HttpStatus status; + private final int code; + private final String message; + + @Override + public HttpStatus httpStatus() { + return status; + } + + @Override + public int codeNumber() { + return code; + } + + @Override + public String message() { + return message; + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/entity/ArticleEntity.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/entity/ArticleEntity.java new file mode 100644 index 0000000..b3d1c42 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/entity/ArticleEntity.java @@ -0,0 +1,111 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity; + +import java.time.LocalDateTime; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.converter.FloatArrayToJsonConverter; + +import jakarta.persistence.*; +import jakarta.validation.constraints.NotBlank; +import jakarta.validation.constraints.NotNull; +import jakarta.validation.constraints.PastOrPresent; +import jakarta.validation.constraints.Pattern; +import jakarta.validation.constraints.Size; + +import org.hibernate.validator.constraints.URL; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; +import lombok.ToString; + +/** + * RSS 피드를 수집 객체 + * + * @author 양병학 + * @since 2025-05-10 최초 작성 + * @modify 2025-05-10 17:47 PR 수정 + * @ToString exclude로 대량의 텍스트필드 로그에서 제외 + * @Data -> @Getter후 Setter는 개별 지정해서 식별자 보호 + * @EqualsAndHashCode 지정으로 갹채 비교 최적화 + */ +@Entity +@Table(name = "article", uniqueConstraints = @UniqueConstraint(columnNames = {"link"})) +@Getter +@NoArgsConstructor +@AllArgsConstructor +@Builder +@ToString(exclude = "description") +@EqualsAndHashCode(of = "id") +public class ArticleEntity { + + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Long id; + + @Setter + @Column(nullable = false) + @NotBlank(message = "제목은 필수 값입니다") + @Size(max = 500, message = "제목은 최대 500자까지 허용됩니다") + private String title; + + @Column(nullable = false, unique = true) + @NotBlank(message = "링크는 필수 값입니다") + @URL(message = "유효한 URL 형식이어야 합니다") + @Size(max = 255, message = "링크는 최대 255자까지 허용됩니다") + private String link; + + @Setter + @Column(name = "pub_date") + @NotNull(message = "발행일은 필수 값입니다") + @PastOrPresent(message = "발행일은 현재 또는 과거 날짜여야 합니다") + private LocalDateTime pubDate; + + @Column + @NotBlank(message = "카테고리는 필수 값입니다") + @Size(max =10, message = "카테고리는 최대 10자까지 허용됩니다") + private String category; + + @Column + @NotBlank(message = "GUID는 필수 값입니다") + @Size(max = 255, message = "GUID는 최대 255자까지 허용됩니다") + @Pattern(regexp = "^[A-Z]{2}\\d+$", message = "GUID는 2개의 대문자와 숫자로 구성되어야 합니다") // 예: KM12345 + private String guid; + + @Setter + @Column(columnDefinition = "TEXT") + private String description; + + @Setter + @Column(name = "image_url") + @Size(max = 1000, message = "이미지 URL은 최대 1000자까지 허용됩니다") + private String imageUrl; + + @Column(name = "created_at") + private LocalDateTime createdAt; + + @Setter + @Column(name = "summary", columnDefinition = "TEXT") + @Size(max = 1000, message = "요약은 최대 1000자까지 허용됩니다") + private String summary; + + @Convert(converter = FloatArrayToJsonConverter.class) + @Column(name = "summary_vector", columnDefinition = "JSON") + private float[] summaryVector; + + public ArticleEntity changeSummaryVector(float[] vector) { + summaryVector = vector; + return this; + } + + @PrePersist + protected void onCreate() { + createdAt = LocalDateTime.now(); + } + + public String getDescription() { + return description != null ? description : ""; + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/mapper/ArticleEntityMapper.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/mapper/ArticleEntityMapper.java new file mode 100644 index 0000000..a6b7193 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/mapper/ArticleEntityMapper.java @@ -0,0 +1,19 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.mapper; + +import com.likelion.backendplus4.talkpick.batch.index.domain.model.NewsInfo; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; + +public class ArticleEntityMapper { + public static NewsInfo toDomainFromEntity(ArticleEntity articleEntity) { + return NewsInfo.builder() + .newsId(articleEntity.getGuid()) + .title(articleEntity.getTitle()) + .content(articleEntity.getDescription()) + .publishedAt(articleEntity.getPubDate()) + .imageUrl(articleEntity.getImageUrl()) + .category(articleEntity.getCategory()) + .summary(articleEntity.getSummary()) + .summaryVector(articleEntity.getSummaryVector()) + .build(); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/NewsInfoJpaRepository.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/NewsInfoJpaRepository.java new file mode 100644 index 0000000..20bbe9e --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/jpa/repository/NewsInfoJpaRepository.java @@ -0,0 +1,25 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.repository; + +import java.time.LocalDateTime; + +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; +import org.springframework.stereotype.Repository; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; + +@Repository +public interface NewsInfoJpaRepository extends JpaRepository { + + boolean existsByLink(String link); + + /** + * 특정 언론사의 가장 최신 기사 발행일 조회 + * + * @param guidPrefix 언론사 GUID 접두어 (예: "KM", "DA", "KH") + * @return 가장 최신 발행일 + */ + @Query("SELECT MAX(a.pubDate) FROM ArticleEntity a WHERE a.guid LIKE CONCAT(:guidPrefix, '%')") + LocalDateTime findLatestPubDateByGuidPrefix(@Param("guidPrefix") String guidPrefix); +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/config/SummaryJobConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/config/SummaryJobConfig.java new file mode 100644 index 0000000..e1ffc58 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/config/SummaryJobConfig.java @@ -0,0 +1,47 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.summary.batch.config; + +import org.springframework.batch.core.Job; +import org.springframework.batch.core.Step; +import org.springframework.batch.core.job.builder.JobBuilder; +import org.springframework.batch.core.repository.JobRepository; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +/** + * Spring Batch Job 설정 클래스.

+ * 수집된 기사를 OpenAI를 활용해 뉴스 내용을 요약하는 + * Batch Job 을 정의하며, 파티셔닝된 Step 을 시작 단계로 구성한다.

+ * + * 이 잡은 {@code articleCollectorBatchJob}이라는 이름으로 정의되며, + * {@link Step} 객체는 외부에서 주입받아 사용한다.

+ * + * 해당 Job 은 Quartz 또는 Spring Scheduler 를 통해 주기적으로 실행될 수 있다.

+ * + * @since 2025-05-17 + */ +@Configuration +public class SummaryJobConfig { + private final String jobName = "summaryJob"; + private final JobRepository jobRepository; + private final Step articleSummaryStep; + + public SummaryJobConfig(JobRepository jobRepository, Step articleSummaryStep) { + this.jobRepository = jobRepository; + this.articleSummaryStep = articleSummaryStep; + } + + /** + * 뉴스 내용을 요약하는 Spring Batch Job Bean을 생성한다. + * 파티셔닝 Step 을 실행하도록 구성한다. + * + * @return 뉴스 요약 Batch Job + * @author 함예정 + * @since 2025-05-10 + */ + @Bean + public Job summaryJob() { + return new JobBuilder(jobName, jobRepository) + .start(articleSummaryStep) + .build(); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/config/SummaryStepConfig.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/config/SummaryStepConfig.java new file mode 100644 index 0000000..981e0af --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/config/SummaryStepConfig.java @@ -0,0 +1,98 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.summary.batch.config; + +import org.springframework.batch.core.Step; +import org.springframework.batch.core.partition.support.Partitioner; +import org.springframework.batch.core.repository.JobRepository; +import org.springframework.batch.core.step.builder.StepBuilder; +import org.springframework.batch.item.ItemWriter; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.core.task.TaskExecutor; +import org.springframework.transaction.PlatformTransactionManager; + +import com.likelion.backendplus4.talkpick.batch.news.article.exception.ArticleCollectorException; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.summary.batch.partitioner.ArticleSummaryPartitioner; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.summary.batch.processor.ArticleSummaryProcessor; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.summary.batch.reader.ArticleSummaryPageReader; + +/** + * 배치 작업에서 기사 요약 처리를 위한 파티셔닝 및 슬레이브 Step을 설정하는 구성 클래스. + * + * @since 2025-05-17 + */ +@Configuration +public class SummaryStepConfig { + private static final String partitionedStepName = "articleSummaryStep"; + private final String executorName = "normalExecutor"; + private final String summaryStepName = "articleSummarySlaveStep"; + private final int gridSize = 5; + private final int chunkSize = 100; + private final int retryLimit = 3; + private final int skipLimit = 100; + private final JobRepository jobRepository; + private final Partitioner partitioner; + private final PlatformTransactionManager transactionManager; + private final TaskExecutor taskExecutor; + private final ArticleSummaryProcessor summaryProcessor; + private final ItemWriter writer; + + public SummaryStepConfig(JobRepository jobRepository, + ArticleSummaryPartitioner partitioner, + PlatformTransactionManager platformTransactionManager, + @Qualifier(executorName) + TaskExecutor taskExecutor, + ArticleSummaryProcessor summaryProcessor, + ItemWriter articleSummaryWriter) { + this.jobRepository = jobRepository; + this.partitioner = partitioner; + this.transactionManager = platformTransactionManager; + this.taskExecutor = taskExecutor; + this.summaryProcessor = summaryProcessor; + this.writer = articleSummaryWriter; + } + + /** + * 파티셔닝된 마스터 Step을 정의한다. + * 각 파티션은 {@code articleSummarySlaveStep}을 실행하며, 병렬 처리를 위해 TaskExecutor가 사용된다. + * + * @param articleSummarySlaveStep 파티션마다 실행될 슬레이브 Step + * @return 마스터 Step Bean + * @author 함예정 + * @since 2025-05-17 + */ + @Bean + public Step articleSummaryStep(Step articleSummarySlaveStep) { + return new StepBuilder(partitionedStepName, jobRepository) + .partitioner(partitionedStepName, partitioner) + .step(articleSummarySlaveStep) + .taskExecutor(taskExecutor) + .gridSize(gridSize) + .build(); + } + + /** + * 기사 데이터를 요약 처리하는 슬레이브 Step을 정의한다. + * 청크 기반으로 데이터를 읽고, 처리하고, 쓰며, 오류에 대해 재시도 및 건너뛰기를 허용한다. + * + * @param reader 기사 데이터를 읽는 Reader + * @return 슬레이브 Step Bean + * @author 함예정 + * @since 2025-05-17 + */ + @Bean + public Step articleSummarySlaveStep(ArticleSummaryPageReader reader) { + return new StepBuilder(summaryStepName, jobRepository) + .chunk(chunkSize, transactionManager) + .reader(reader) + .processor(summaryProcessor) + .writer(writer) + .faultTolerant() + .retry(ArticleCollectorException.class) + .retryLimit(retryLimit) + .skip(ArticleCollectorException.class) + .skipLimit(skipLimit) + .build(); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/exception/ArticleSummaryException.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/exception/ArticleSummaryException.java new file mode 100644 index 0000000..bfb7389 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/exception/ArticleSummaryException.java @@ -0,0 +1,23 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.summary.batch.exception; + +import com.likelion.backendplus4.talkpick.batch.common.exception.CustomException; +import com.likelion.backendplus4.talkpick.batch.common.exception.error.ErrorCode; + +public class ArticleSummaryException extends CustomException { + private final ErrorCode errorCode; + + public ArticleSummaryException(ErrorCode errorCode) { + super(errorCode); + this.errorCode = errorCode; + } + + public ArticleSummaryException(ErrorCode errorCode, Throwable cause) { + super(errorCode, cause); + this.errorCode = errorCode; + } + + @Override + public ErrorCode getErrorCode() { + return errorCode; + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/exception/error/ArticleSummaryErrorCode.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/exception/error/ArticleSummaryErrorCode.java new file mode 100644 index 0000000..23456a4 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/exception/error/ArticleSummaryErrorCode.java @@ -0,0 +1,55 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.summary.batch.exception.error; + +import org.springframework.http.HttpStatus; + +import com.likelion.backendplus4.talkpick.batch.common.exception.error.ErrorCode; + +import lombok.RequiredArgsConstructor; + +/** + * 에러 코드 인터페이스 각 에러 항목에 대한 HTTP 상태, 에러 번호, 메시지를 제공한다. + * A[BB][CCC] + * A (1자리) : 에러 심각도 (1~5) + * 1: 클라이언트 오류 + * 2: 인증 관련 오류 + * 3: 사용자 관련 오류 + * 4: 서버 오류 + * 5: 시스템 오류 + * + * BB (2자리) : 도메인 코드 + * 10: 사용자 관련 (ex: USER_NOT_FOUND) + * 20: 인증 관련 (ex: AUTHORIZATION_FAILED) + * 30: DB 관련 오류 (ex: DB_CONNECTION_FAILED) + * 40: API 관련 오류 (ex: API_TIMEOUT) + * 50: 시스템 오류 (ex: INTERNAL_SERVER_ERROR) + * + * CCC (3자리) : 세부 오류 순번 + * 001: 첫 번째 오류 + * 002: 두 번째 오류 + * 003: 세 번째 오류, 등등 + * + * @since 2025-05-18 + */ +@RequiredArgsConstructor +public enum ArticleSummaryErrorCode implements ErrorCode { + ITEM_NOT_FOUND(HttpStatus.SERVICE_UNAVAILABLE, 450001, "요약할 뉴스가 없습니다"); + + private final HttpStatus status; + private final int code; + private final String message; + + @Override + public HttpStatus httpStatus() { + return status; + } + + @Override + public int codeNumber() { + return code; + } + + @Override + public String message() { + return message; + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/partitioner/ArticleSummaryPartitioner.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/partitioner/ArticleSummaryPartitioner.java new file mode 100644 index 0000000..bc13fd6 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/partitioner/ArticleSummaryPartitioner.java @@ -0,0 +1,111 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.summary.batch.partitioner; + +import java.util.List; +import java.util.Map; + +import org.springframework.batch.core.partition.support.Partitioner; +import org.springframework.batch.item.ExecutionContext; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.exception.EmbeddingException; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.embedding.batch.exception.error.EmbeddingErrorCode; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.summary.batch.exception.ArticleSummaryException; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.summary.batch.exception.error.ArticleSummaryErrorCode; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.support.partitioner.IdRangePartitionCalculator; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.support.partitioner.PartitionMapBuilder; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.support.partitioner.dto.ArticleIdRange; + +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +/** + * {@link Partitioner} 구현체로, 전체 페이지 수(totalPages)를 기준으로 + * 병렬 처리할 수 있도록 페이지 범위를 gridSize만큼 균등하게 분할한다. + * + *

Spring Batch에서 멀티스레드로 작업을 병렬 처리할 때 사용되며, + * 각 ExecutionContext에는 'startPage'와 'endPage'가 설정된다. + * + * @since 2025-05-17 + */ +@Component +@Slf4j +@RequiredArgsConstructor +public class ArticleSummaryPartitioner implements Partitioner { + private static final String QUERY_GET_MIN_ID = """ + SELECT MIN(a.id) FROM ArticleEntity a WHERE a.summary IS NULL + """; + private static final String QUERY_GET_MAX_ID = """ + SELECT MAX(a.id) FROM ArticleEntity a WHERE a.summary IS NULL + """; + + private final IdRangePartitionCalculator calculator; + @PersistenceContext + private EntityManager entityManager; + + /** + * ID 범위를 기준으로 데이터를 분할한다. + * Spring Batch에서 마스터 Step이 병렬로 슬레이브 Step을 실행할 수 있도록 파티션을 생성한다. + * + * @author 함예정 + * @since 2025-05-17 + */ + @Override + public Map partition(int gridSize) { + Long minId = createQuery(QUERY_GET_MIN_ID); + Long maxId = createQuery(QUERY_GET_MAX_ID); + + throwIfInvalidIdRange(minId, maxId); + List ranges = calculator.calculate(minId, maxId, gridSize); + return PartitionMapBuilder.build(ranges); + } + + /** + * 주어진 ID 범위가 유효하지 않을 경우 예외를 발생시킨다. + * + * minId 또는 maxId가 null이거나, minId가 maxId보다 큰 경우 + * {@link ArticleSummaryException}을 {@link ArticleSummaryErrorCode#ITEM_NOT_FOUND}와 함께 발생시킨다. + * + * @param minId ID 범위의 최소값 + * @param maxId ID 범위의 최대값 + * @throws ArticleSummaryException 유효하지 않은 ID 범위일 경우 + * + * @author 함예정 + * @since 2025-05-18 + */ + private void throwIfInvalidIdRange(Long minId, Long maxId) { + if (minId == null || maxId == null || minId > maxId) { + throw new ArticleSummaryException(ArticleSummaryErrorCode.ITEM_NOT_FOUND); + } + } + + /** + * 주어진 JPQL 쿼리를 실행하여 단일 Long 값을 반환한다. + * + * @param query 실행할 JPQL 쿼리 문자열 + * @return 조회된 Long 값 + * @author 함예정 + * @since 2025-05-17 + */ + private Long createQuery(String query) { + return entityManager.createQuery( + query, + Long.class + ).getSingleResult(); + } + + /** + * ID 범위가 유효한지 확인한다. + * + * @param minId 조회된 최소 ID + * @param maxId 조회된 최대 ID + * @return 범위가 유효하지 않으면 true 반환 + * @author 함예정 + * @since 2025-05-17 + */ + private boolean isInvalidIdRange(Long minId, Long maxId) { + return minId == null || maxId == null || minId > maxId; + } + +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/processor/ArticleSummaryProcessor.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/processor/ArticleSummaryProcessor.java new file mode 100644 index 0000000..0ddd1e8 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/processor/ArticleSummaryProcessor.java @@ -0,0 +1,53 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.summary.batch.processor; + +import org.springframework.ai.chat.client.ChatClient; +import org.springframework.batch.item.ItemProcessor; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +/** + * 뉴스 기사를 AI 모델을 통해 요약 처리하는 ItemProcessor 구현체. + * 입력으로 받은 ArticleEntity의 본문을 요약하여 summary 필드에 설정한 후 반환한다. + * + * @since 2025-05-17 + */ +@Component +@RequiredArgsConstructor +@Slf4j +public class ArticleSummaryProcessor implements ItemProcessor { + private final String prompt = "Summarize the following news in 3–5 concise sentences, objectively, in Korean.\n\n news: \n"; + private final ChatClient chatClient; + + /** + * 기사 내용을 AI를 통해 요약하고, 요약 결과를 ArticleEntity에 설정하여 반환한다. + * + * @param item 요약할 뉴스 기사 엔티티 + * @return 요약이 포함된 뉴스 기사 엔티티 + * @author 함예정 + * @since 2025-05-17 + */ + @Override + public ArticleEntity process(ArticleEntity item) { + log.info("뉴스 요약: id = {}, guid = {}", item.getId(), item.getGuid()); + String newsContent = item.getDescription(); + String summary = getSummary(newsContent); + item.setSummary(summary); + return item; + } + + /** + * 주어진 뉴스 기사 본문을 AI 모델을 통해 요약한다. + * + * @param text 뉴스 기사 본문 + * @return 요약된 텍스트 + * @author 함예정 + * @since 2025-05-17 + */ + private String getSummary(String text) { + return chatClient.prompt().user(prompt + text).call().content(); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/reader/ArticleSummaryPageReader.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/reader/ArticleSummaryPageReader.java new file mode 100644 index 0000000..57c9cb3 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/reader/ArticleSummaryPageReader.java @@ -0,0 +1,57 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.summary.batch.reader; + +import java.util.HashMap; +import java.util.Map; + +import org.springframework.batch.core.configuration.annotation.StepScope; +import org.springframework.batch.item.database.JpaPagingItemReader; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; + +import jakarta.persistence.EntityManagerFactory; +import lombok.extern.slf4j.Slf4j; + +/** + * 요약되지 않은 뉴스 기사 데이터를 ID 범위 기반으로 페이징 조회하는 JPA ItemReader. + * 파티셔닝된 슬레이브 Step에서 각 파티션이 담당할 ID 구간의 데이터를 읽기 위해 사용된다. + * + * @since 2025-05-17 + */ +@Component +@Slf4j +@StepScope +public class ArticleSummaryPageReader extends JpaPagingItemReader { + private static final String JPQL = """ + SELECT a + FROM ArticleEntity a + WHERE a.summary IS NULL + AND a.id BETWEEN :minId AND :maxId + """; + + /** + * 지정된 ID 범위에 해당하는 기사 데이터를 페이징 방식으로 읽어오는 Reader를 초기화한다. + * + * @param entityManagerFactory JPA EntityManagerFactory + * @param minId 파티션에서 처리할 최소 ID (StepExecutionContext에서 주입됨) + * @param maxId 파티션에서 처리할 최대 ID (StepExecutionContext에서 주입됨) + * @author 함예정 + * @since 2025-05-17 + */ + public ArticleSummaryPageReader( + EntityManagerFactory entityManagerFactory, + @Value("#{stepExecutionContext[minId]}") Long minId, + @Value("#{stepExecutionContext[maxId]}") Long maxId) { + + this.setName("articleSummaryReader-" + minId + "-" + maxId); + this.setEntityManagerFactory(entityManagerFactory); + this.setQueryString(JPQL); + Map params = new HashMap<>(); + params.put("minId", minId); + params.put("maxId", maxId); + this.setParameterValues(params); + this.setPageSize(100); + this.setSaveState(false); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/writer/ArticleSummaryWriter.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/writer/ArticleSummaryWriter.java new file mode 100644 index 0000000..61851b3 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/summary/batch/writer/ArticleSummaryWriter.java @@ -0,0 +1,34 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.summary.batch.writer; + +import org.springframework.batch.item.Chunk; +import org.springframework.batch.item.ItemWriter; +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.entity.ArticleEntity; +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.jpa.repository.NewsInfoJpaRepository; + +import lombok.RequiredArgsConstructor; + +/** + * 처리된 뉴스 기사 요약 데이터를 DB에 저장하는 ItemWriter 구현체. + * + * @since 2025-05-17 + */ +@Component +@RequiredArgsConstructor +public class ArticleSummaryWriter implements ItemWriter { + private final NewsInfoJpaRepository newsInfoJpaRepository; + + /** + * 청크 단위로 받은 기사 요약 데이터를 데이터베이스에 일괄 저장한다. + * + * @param chunk 요약이 완료된 기사 데이터 목록 + * @throws Exception 저장 중 발생할 수 있는 예외 + * @author 함예정 + * @since 2025-05-17 + */ + @Override + public void write(Chunk chunk) throws Exception { + newsInfoJpaRepository.saveAll(chunk); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/support/partitioner/IdRangePartitionCalculator.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/support/partitioner/IdRangePartitionCalculator.java new file mode 100644 index 0000000..4063697 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/support/partitioner/IdRangePartitionCalculator.java @@ -0,0 +1,37 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.support.partitioner; + +import java.util.ArrayList; +import java.util.List; + +import org.springframework.stereotype.Component; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.support.partitioner.dto.ArticleIdRange; + +@Component +public class IdRangePartitionCalculator { + + /** + * @param minId 최소 ID + * @param maxId 최대 ID + * @param gridSize 분할 개수 + * @return 각 파티션의 IdRange 리스트 + */ + public List calculate(long minId, long maxId, int gridSize) { + long total = maxId - minId + 1; + long baseSize = total / gridSize; + long remainder = total % gridSize; + + List ranges = new ArrayList<>(gridSize); + long start = minId; + + for (int i = 0; i < gridSize; i++) { + long size = baseSize + (i < remainder ? 1 : 0); + long end = (i == gridSize - 1) ? maxId : (start + size - 1); + + ranges.add(new ArticleIdRange(start, end)); + start = end + 1; + } + + return ranges; + } +} \ No newline at end of file diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/support/partitioner/PartitionMapBuilder.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/support/partitioner/PartitionMapBuilder.java new file mode 100644 index 0000000..3639cbb --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/support/partitioner/PartitionMapBuilder.java @@ -0,0 +1,23 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.support.partitioner; + +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import org.springframework.batch.item.ExecutionContext; + +import com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.support.partitioner.dto.ArticleIdRange; + +public class PartitionMapBuilder { + public static Map build(List ranges) { + Map partitions = new LinkedHashMap<>(); + for (int i = 0; i < ranges.size(); i++) { + ArticleIdRange r = ranges.get(i); + ExecutionContext ctx = new ExecutionContext(); + ctx.putLong("minId", r.start()); + ctx.putLong("maxId", r.end()); + partitions.put("partition" + i, ctx); + } + return partitions; + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/support/partitioner/dto/ArticleIdRange.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/support/partitioner/dto/ArticleIdRange.java new file mode 100644 index 0000000..ddc66b2 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/infrastructure/support/partitioner/dto/ArticleIdRange.java @@ -0,0 +1,8 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.infrastructure.support.partitioner.dto; + +/** + * ID 범위(start, end)를 표현하는 불변 타입 DTO + * + * @since 2025-05-18 + */ +public record ArticleIdRange(long start, long end) {} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/presentation/controller/ArticleCollectorController.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/presentation/controller/ArticleCollectorController.java new file mode 100644 index 0000000..59c369c --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/news/article/presentation/controller/ArticleCollectorController.java @@ -0,0 +1,51 @@ +package com.likelion.backendplus4.talkpick.batch.news.article.presentation.controller; + +import static com.likelion.backendplus4.talkpick.batch.common.response.ApiResponse.*; + +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.DeleteMapping; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +import com.likelion.backendplus4.talkpick.batch.common.response.ApiResponse; +import com.likelion.backendplus4.talkpick.batch.news.article.application.port.in.ArticleCollectorUseCase; +import com.likelion.backendplus4.talkpick.batch.news.article.application.service.dto.ArticleCollectorStatusResponse; + +import lombok.RequiredArgsConstructor; + +/** + * 뉴스 수집 스케줄러 컨트롤러. + * 수집기 실행 및 정지를 위한 API 엔드포인트를 제공한다. + * 내부적으로 {@link ArticleCollectorUseCase}를 호출하여 작업을 위임한다. + */ +@RestController +@RequiredArgsConstructor +@RequestMapping("/news/collector") +public class ArticleCollectorController { + private final ArticleCollectorUseCase articleCollectorUsecase; + + /** + * 뉴스 RSS 수집을 시작한다. + * + * @return 수집기 상태 응답 (실행 여부 및 메시지 포함) + * @since 2025-05-10 + * @author 함예정 + */ + @PostMapping("/start") + public ResponseEntity> start() { + return success(articleCollectorUsecase.start()); + } + + /** + * 뉴스 RSS 수집을 정지한다. + * + * @return 수집기 상태 응답 (정지 여부 및 메시지 포함) + * @since 2025-05-10 + * @author 함예정 + */ + @DeleteMapping("/stop") + public ResponseEntity> stop() { + return success(articleCollectorUsecase.stop()); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/application/ExceptionSampleService.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/application/ExceptionSampleService.java new file mode 100644 index 0000000..ff9a7d9 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/application/ExceptionSampleService.java @@ -0,0 +1,39 @@ +package com.likelion.backendplus4.talkpick.batch.sample.common.exception.application; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.likelion.backendplus4.talkpick.batch.sample.common.exception.application.in.ExceptionSampleUseCase; +import com.likelion.backendplus4.talkpick.batch.sample.common.exception.exception.SampleException; +import com.likelion.backendplus4.talkpick.batch.sample.common.exception.exception.error.ExceptionSampleErrorCode; +import org.springframework.stereotype.Service; + +@Service +public class ExceptionSampleService implements ExceptionSampleUseCase { + + /** + * 예외 처리 예시 로직입니다. + */ + @Override + public String failCase1() { + boolean somethingWrong = true; + if (somethingWrong) { + throw new SampleException(ExceptionSampleErrorCode.SAMPLE_EXCEPTION); + } + return null; + } + + /** + * 예외 처리 예시 로직입니다. + * e를 담아서 반환하는 경우 입니다. + */ + @Override + public String failCase2(String arrayNode) { + try { + throw new JsonProcessingException("강제 예외 발생") { }; + } catch (JsonProcessingException e) { + // 원인 예외(e)를 함께 전달 + throw new SampleException(ExceptionSampleErrorCode.SAMPLE_EXCEPTION, e); + } + } + + +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/application/in/ExceptionSampleUseCase.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/application/in/ExceptionSampleUseCase.java new file mode 100644 index 0000000..d72e969 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/application/in/ExceptionSampleUseCase.java @@ -0,0 +1,7 @@ +package com.likelion.backendplus4.talkpick.batch.sample.common.exception.application.in; + +public interface ExceptionSampleUseCase { + String failCase1(); + String failCase2(String arrayNode); + +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/exception/SampleException.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/exception/SampleException.java new file mode 100644 index 0000000..0e9568f --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/exception/SampleException.java @@ -0,0 +1,24 @@ +package com.likelion.backendplus4.talkpick.batch.sample.common.exception.exception; + +import com.likelion.backendplus4.talkpick.batch.common.exception.CustomException; +import com.likelion.backendplus4.talkpick.batch.common.exception.error.ErrorCode; + +public class SampleException extends CustomException { + + private final ErrorCode errorCode; + + public SampleException(ErrorCode errorCode) { + super(errorCode); + this.errorCode = errorCode; + } + + public SampleException(ErrorCode errorCode, Throwable cause) { + super(errorCode, cause); + this.errorCode = errorCode; + } + + @Override + public ErrorCode getErrorCode() { + return errorCode; + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/exception/error/ExceptionSampleErrorCode.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/exception/error/ExceptionSampleErrorCode.java new file mode 100644 index 0000000..29872c5 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/exception/error/ExceptionSampleErrorCode.java @@ -0,0 +1,30 @@ +package com.likelion.backendplus4.talkpick.batch.sample.common.exception.exception.error; + +import com.likelion.backendplus4.talkpick.batch.common.exception.error.ErrorCode; +import lombok.AllArgsConstructor; +import org.springframework.http.HttpStatus; + +@AllArgsConstructor +public enum ExceptionSampleErrorCode implements ErrorCode { + + SAMPLE_EXCEPTION(HttpStatus.INTERNAL_SERVER_ERROR, 440000, "실패"); + + private final HttpStatus status; + private final int code; + private final String message; + + @Override + public HttpStatus httpStatus() { + return status; + } + + @Override + public int codeNumber() { + return code; + } + + @Override + public String message() { + return message; + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/presentation/ExceptionSampleController.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/presentation/ExceptionSampleController.java new file mode 100644 index 0000000..9094078 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/exception/presentation/ExceptionSampleController.java @@ -0,0 +1,26 @@ +package com.likelion.backendplus4.talkpick.batch.sample.common.exception.presentation; + +import com.likelion.backendplus4.talkpick.batch.sample.common.exception.application.ExceptionSampleService; +import lombok.RequiredArgsConstructor; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +@RestController +@RequiredArgsConstructor +@RequestMapping("/temp") +public class ExceptionSampleController { + + private final ExceptionSampleService sampleService; + + + @GetMapping("/fail-case1") + public String failCase1() { + return sampleService.failCase1(); + } + + @GetMapping("/fail-case2") + public String failCase2() { + return sampleService.failCase2(null); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/logging/TestLoggingController.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/logging/TestLoggingController.java new file mode 100644 index 0000000..8e40f13 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/logging/TestLoggingController.java @@ -0,0 +1,24 @@ +package com.likelion.backendplus4.talkpick.batch.sample.common.logging; + +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.LogJson; + +import lombok.RequiredArgsConstructor; + +@RestController +@RequestMapping("/testController") +@RequiredArgsConstructor +public class TestLoggingController { + private final TestLoggingService testLoggingService; + + @LogJson + @PostMapping + public String test(@RequestBody TestLoggingRequest testLoggingRequest) { + System.out.println("TestController 요청 성공"); + return testLoggingService.test(testLoggingRequest.getName()); + } +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/logging/TestLoggingRequest.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/logging/TestLoggingRequest.java new file mode 100644 index 0000000..ca57ca1 --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/logging/TestLoggingRequest.java @@ -0,0 +1,11 @@ +package com.likelion.backendplus4.talkpick.batch.sample.common.logging; + +import lombok.Data; +import lombok.RequiredArgsConstructor; + +@RequiredArgsConstructor +@Data +public class TestLoggingRequest { + private final String name; + private final int age; +} diff --git a/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/logging/TestLoggingService.java b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/logging/TestLoggingService.java new file mode 100644 index 0000000..4114cdc --- /dev/null +++ b/src/main/java/com/likelion/backendplus4/talkpick/batch/sample/common/logging/TestLoggingService.java @@ -0,0 +1,18 @@ +package com.likelion.backendplus4.talkpick.batch.sample.common.logging; + +import org.springframework.stereotype.Service; + +import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.EntryExitLog; +import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.LogMethodValues; +import com.likelion.backendplus4.talkpick.batch.common.annotation.logging.TimeTracker; + +@Service +public class TestLoggingService { + @EntryExitLog + @LogMethodValues + @TimeTracker + public String test(String text) { + System.out.println("TestService 요청 성공"); + return "bye"; + } +} diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index 896c2ce..13e3221 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -1,7 +1,21 @@ server: - port: ${WEB_PORT:8082} + port: ${BATCH_WEB_PORT:8082} + error: + whitelabel: + enabled: false spring: + elasticsearch: + uris: ${ELS_URI} + ai: + openai: + api-key: ${OPENAI_API_KEY} + embedding-model: text-embedding-3-small + summary: + model: gpt-4.1-nano + maxCompletionTokens: 800 + temperature: 0.5 + application: name: talkpick-batch datasource: @@ -9,6 +23,12 @@ spring: username: ${MYSQL_USERNAME} password: ${MYSQL_PASSWORD} driver-class-name: com.mysql.cj.jdbc.Driver + hikari: + maximum-pool-size: 20 + minimum-idle: 10 + idle-timeout: 30000 + max-lifetime: 1800000 + connection-timeout: 30000 jpa: database-platform: org.hibernate.dialect.MySQL8Dialect hibernate: @@ -17,7 +37,40 @@ spring: hibernate: format_sql: true open-in-view: false + web: + resources: + add-mappings: false + batch: + job: + enabled: false + jdbc: + initialize-schema: embedded + quartz: + job-store-type: jdbc + jdbc: + initialize-schema: embedded + article-collector: + cron: "0 */1 * * * ?" + article-embedding: + cron: "0 */5 * * * ?" + +log: + rolling: + directory: logs + file-name: talkpick-batch.log + pattern: "%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - [TraceId: %X{traceId:-no-trace}] - %msg%n" + max-history: 30 + total-size-cap: 10MB + logging: - pattern: - file: "%d{yyyy-MM-dd HH:mm:ss.SSS} [%level] [%thread] [%logger{36}] - %msg%n" - console: "%cyan(%d{yyyy-MM-dd HH:mm:ss.SSS}) %highlight(%-5level) %yellow([%thread]) %green(%logger{36}) - %msg%n" + level: + org.quartz: DEBUG + file: + name: talkpick-batch.log +decorator: + datasource: + p6spy: + enable-logging: false +news: + index: + name: news_index diff --git a/src/main/resources/batch/schema-mysql.sql b/src/main/resources/batch/schema-mysql.sql new file mode 100644 index 0000000..197ef3f --- /dev/null +++ b/src/main/resources/batch/schema-mysql.sql @@ -0,0 +1,98 @@ +-- Autogenerated: do not edit this file + +CREATE TABLE BATCH_JOB_INSTANCE ( + JOB_INSTANCE_ID BIGINT NOT NULL PRIMARY KEY , + VERSION BIGINT , + JOB_NAME VARCHAR(100) NOT NULL, + JOB_KEY VARCHAR(32) NOT NULL, + constraint JOB_INST_UN unique (JOB_NAME, JOB_KEY) +) ENGINE=InnoDB; + +CREATE TABLE BATCH_JOB_EXECUTION ( + JOB_EXECUTION_ID BIGINT NOT NULL PRIMARY KEY , + VERSION BIGINT , + JOB_INSTANCE_ID BIGINT NOT NULL, + CREATE_TIME DATETIME(6) NOT NULL, + START_TIME DATETIME(6) DEFAULT NULL , + END_TIME DATETIME(6) DEFAULT NULL , + STATUS VARCHAR(10) , + EXIT_CODE VARCHAR(2500) , + EXIT_MESSAGE VARCHAR(2500) , + LAST_UPDATED DATETIME(6), + constraint JOB_INST_EXEC_FK foreign key (JOB_INSTANCE_ID) + references BATCH_JOB_INSTANCE(JOB_INSTANCE_ID) +) ENGINE=InnoDB; + +CREATE TABLE BATCH_JOB_EXECUTION_PARAMS ( + JOB_EXECUTION_ID BIGINT NOT NULL , + PARAMETER_NAME VARCHAR(100) NOT NULL , + PARAMETER_TYPE VARCHAR(100) NOT NULL , + PARAMETER_VALUE VARCHAR(2500) , + IDENTIFYING CHAR(1) NOT NULL , + constraint JOB_EXEC_PARAMS_FK foreign key (JOB_EXECUTION_ID) + references BATCH_JOB_EXECUTION(JOB_EXECUTION_ID) +) ENGINE=InnoDB; + +CREATE TABLE BATCH_STEP_EXECUTION ( + STEP_EXECUTION_ID BIGINT NOT NULL PRIMARY KEY , + VERSION BIGINT NOT NULL, + STEP_NAME VARCHAR(100) NOT NULL, + JOB_EXECUTION_ID BIGINT NOT NULL, + CREATE_TIME DATETIME(6) NOT NULL, + START_TIME DATETIME(6) DEFAULT NULL , + END_TIME DATETIME(6) DEFAULT NULL , + STATUS VARCHAR(10) , + COMMIT_COUNT BIGINT , + READ_COUNT BIGINT , + FILTER_COUNT BIGINT , + WRITE_COUNT BIGINT , + READ_SKIP_COUNT BIGINT , + WRITE_SKIP_COUNT BIGINT , + PROCESS_SKIP_COUNT BIGINT , + ROLLBACK_COUNT BIGINT , + EXIT_CODE VARCHAR(2500) , + EXIT_MESSAGE VARCHAR(2500) , + LAST_UPDATED DATETIME(6), + constraint JOB_EXEC_STEP_FK foreign key (JOB_EXECUTION_ID) + references BATCH_JOB_EXECUTION(JOB_EXECUTION_ID) +) ENGINE=InnoDB; + +CREATE TABLE BATCH_STEP_EXECUTION_CONTEXT ( + STEP_EXECUTION_ID BIGINT NOT NULL PRIMARY KEY, + SHORT_CONTEXT VARCHAR(2500) NOT NULL, + SERIALIZED_CONTEXT TEXT , + constraint STEP_EXEC_CTX_FK foreign key (STEP_EXECUTION_ID) + references BATCH_STEP_EXECUTION(STEP_EXECUTION_ID) +) ENGINE=InnoDB; + +CREATE TABLE BATCH_JOB_EXECUTION_CONTEXT ( + JOB_EXECUTION_ID BIGINT NOT NULL PRIMARY KEY, + SHORT_CONTEXT VARCHAR(2500) NOT NULL, + SERIALIZED_CONTEXT TEXT , + constraint JOB_EXEC_CTX_FK foreign key (JOB_EXECUTION_ID) + references BATCH_JOB_EXECUTION(JOB_EXECUTION_ID) +) ENGINE=InnoDB; + +CREATE TABLE BATCH_STEP_EXECUTION_SEQ ( + ID BIGINT NOT NULL, + UNIQUE_KEY CHAR(1) NOT NULL, + constraint UNIQUE_KEY_UN unique (UNIQUE_KEY) +) ENGINE=InnoDB; + +INSERT INTO BATCH_STEP_EXECUTION_SEQ (ID, UNIQUE_KEY) select * from (select 0 as ID, '0' as UNIQUE_KEY) as tmp where not exists(select * from BATCH_STEP_EXECUTION_SEQ); + +CREATE TABLE BATCH_JOB_EXECUTION_SEQ ( + ID BIGINT NOT NULL, + UNIQUE_KEY CHAR(1) NOT NULL, + constraint UNIQUE_KEY_UN unique (UNIQUE_KEY) +) ENGINE=InnoDB; + +INSERT INTO BATCH_JOB_EXECUTION_SEQ (ID, UNIQUE_KEY) select * from (select 0 as ID, '0' as UNIQUE_KEY) as tmp where not exists(select * from BATCH_JOB_EXECUTION_SEQ); + +CREATE TABLE BATCH_JOB_SEQ ( + ID BIGINT NOT NULL, + UNIQUE_KEY CHAR(1) NOT NULL, + constraint UNIQUE_KEY_UN unique (UNIQUE_KEY) +) ENGINE=InnoDB; + +INSERT INTO BATCH_JOB_SEQ (ID, UNIQUE_KEY) select * from (select 0 as ID, '0' as UNIQUE_KEY) as tmp where not exists(select * from BATCH_JOB_SEQ); diff --git a/src/main/resources/quartz/tables_mysql_innodb.sql b/src/main/resources/quartz/tables_mysql_innodb.sql new file mode 100644 index 0000000..8968c23 --- /dev/null +++ b/src/main/resources/quartz/tables_mysql_innodb.sql @@ -0,0 +1,179 @@ +# +# In your Quartz properties file, you'll need to set +# org.quartz.jobStore.driverDelegateClass = org.quartz.impl.jdbcjobstore.StdJDBCDelegate +# +# +# By: Ron Cordell - roncordell +# I didn't see this anywhere, so I thought I'd post it here. This is the script from Quartz to create the tables in a MySQL database, modified to use INNODB instead of MYISAM. + +DROP TABLE IF EXISTS QRTZ_FIRED_TRIGGERS; +DROP TABLE IF EXISTS QRTZ_PAUSED_TRIGGER_GRPS; +DROP TABLE IF EXISTS QRTZ_SCHEDULER_STATE; +DROP TABLE IF EXISTS QRTZ_LOCKS; +DROP TABLE IF EXISTS QRTZ_SIMPLE_TRIGGERS; +DROP TABLE IF EXISTS QRTZ_SIMPROP_TRIGGERS; +DROP TABLE IF EXISTS QRTZ_CRON_TRIGGERS; +DROP TABLE IF EXISTS QRTZ_BLOB_TRIGGERS; +DROP TABLE IF EXISTS QRTZ_TRIGGERS; +DROP TABLE IF EXISTS QRTZ_JOB_DETAILS; +DROP TABLE IF EXISTS QRTZ_CALENDARS; + +CREATE TABLE QRTZ_JOB_DETAILS( +SCHED_NAME VARCHAR(120) NOT NULL, +JOB_NAME VARCHAR(190) NOT NULL, +JOB_GROUP VARCHAR(190) NOT NULL, +DESCRIPTION VARCHAR(250) NULL, +JOB_CLASS_NAME VARCHAR(250) NOT NULL, +IS_DURABLE VARCHAR(1) NOT NULL, +IS_NONCONCURRENT VARCHAR(1) NOT NULL, +IS_UPDATE_DATA VARCHAR(1) NOT NULL, +REQUESTS_RECOVERY VARCHAR(1) NOT NULL, +JOB_DATA BLOB NULL, +PRIMARY KEY (SCHED_NAME,JOB_NAME,JOB_GROUP)) +ENGINE=InnoDB; + +CREATE TABLE QRTZ_TRIGGERS ( +SCHED_NAME VARCHAR(120) NOT NULL, +TRIGGER_NAME VARCHAR(190) NOT NULL, +TRIGGER_GROUP VARCHAR(190) NOT NULL, +JOB_NAME VARCHAR(190) NOT NULL, +JOB_GROUP VARCHAR(190) NOT NULL, +DESCRIPTION VARCHAR(250) NULL, +NEXT_FIRE_TIME BIGINT(13) NULL, +PREV_FIRE_TIME BIGINT(13) NULL, +PRIORITY INTEGER NULL, +TRIGGER_STATE VARCHAR(16) NOT NULL, +TRIGGER_TYPE VARCHAR(8) NOT NULL, +START_TIME BIGINT(13) NOT NULL, +END_TIME BIGINT(13) NULL, +CALENDAR_NAME VARCHAR(190) NULL, +MISFIRE_INSTR SMALLINT(2) NULL, +JOB_DATA BLOB NULL, +PRIMARY KEY (SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP), +FOREIGN KEY (SCHED_NAME,JOB_NAME,JOB_GROUP) +REFERENCES QRTZ_JOB_DETAILS(SCHED_NAME,JOB_NAME,JOB_GROUP)) +ENGINE=InnoDB; + +CREATE TABLE QRTZ_SIMPLE_TRIGGERS ( +SCHED_NAME VARCHAR(120) NOT NULL, +TRIGGER_NAME VARCHAR(190) NOT NULL, +TRIGGER_GROUP VARCHAR(190) NOT NULL, +REPEAT_COUNT BIGINT(7) NOT NULL, +REPEAT_INTERVAL BIGINT(12) NOT NULL, +TIMES_TRIGGERED BIGINT(10) NOT NULL, +PRIMARY KEY (SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP), +FOREIGN KEY (SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP) +REFERENCES QRTZ_TRIGGERS(SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP)) +ENGINE=InnoDB; + +CREATE TABLE QRTZ_CRON_TRIGGERS ( +SCHED_NAME VARCHAR(120) NOT NULL, +TRIGGER_NAME VARCHAR(190) NOT NULL, +TRIGGER_GROUP VARCHAR(190) NOT NULL, +CRON_EXPRESSION VARCHAR(120) NOT NULL, +TIME_ZONE_ID VARCHAR(80), +PRIMARY KEY (SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP), +FOREIGN KEY (SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP) +REFERENCES QRTZ_TRIGGERS(SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP)) +ENGINE=InnoDB; + +CREATE TABLE QRTZ_SIMPROP_TRIGGERS + ( + SCHED_NAME VARCHAR(120) NOT NULL, + TRIGGER_NAME VARCHAR(190) NOT NULL, + TRIGGER_GROUP VARCHAR(190) NOT NULL, + STR_PROP_1 VARCHAR(512) NULL, + STR_PROP_2 VARCHAR(512) NULL, + STR_PROP_3 VARCHAR(512) NULL, + INT_PROP_1 INT NULL, + INT_PROP_2 INT NULL, + LONG_PROP_1 BIGINT NULL, + LONG_PROP_2 BIGINT NULL, + DEC_PROP_1 NUMERIC(13,4) NULL, + DEC_PROP_2 NUMERIC(13,4) NULL, + BOOL_PROP_1 VARCHAR(1) NULL, + BOOL_PROP_2 VARCHAR(1) NULL, + PRIMARY KEY (SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP), + FOREIGN KEY (SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP) + REFERENCES QRTZ_TRIGGERS(SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP)) +ENGINE=InnoDB; + +CREATE TABLE QRTZ_BLOB_TRIGGERS ( +SCHED_NAME VARCHAR(120) NOT NULL, +TRIGGER_NAME VARCHAR(190) NOT NULL, +TRIGGER_GROUP VARCHAR(190) NOT NULL, +BLOB_DATA BLOB NULL, +PRIMARY KEY (SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP), +INDEX (SCHED_NAME,TRIGGER_NAME, TRIGGER_GROUP), +FOREIGN KEY (SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP) +REFERENCES QRTZ_TRIGGERS(SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP)) +ENGINE=InnoDB; + +CREATE TABLE QRTZ_CALENDARS ( +SCHED_NAME VARCHAR(120) NOT NULL, +CALENDAR_NAME VARCHAR(190) NOT NULL, +CALENDAR BLOB NOT NULL, +PRIMARY KEY (SCHED_NAME,CALENDAR_NAME)) +ENGINE=InnoDB; + +CREATE TABLE QRTZ_PAUSED_TRIGGER_GRPS ( +SCHED_NAME VARCHAR(120) NOT NULL, +TRIGGER_GROUP VARCHAR(190) NOT NULL, +PRIMARY KEY (SCHED_NAME,TRIGGER_GROUP)) +ENGINE=InnoDB; + +CREATE TABLE QRTZ_FIRED_TRIGGERS ( +SCHED_NAME VARCHAR(120) NOT NULL, +ENTRY_ID VARCHAR(95) NOT NULL, +TRIGGER_NAME VARCHAR(190) NOT NULL, +TRIGGER_GROUP VARCHAR(190) NOT NULL, +INSTANCE_NAME VARCHAR(190) NOT NULL, +FIRED_TIME BIGINT(13) NOT NULL, +SCHED_TIME BIGINT(13) NOT NULL, +PRIORITY INTEGER NOT NULL, +STATE VARCHAR(16) NOT NULL, +JOB_NAME VARCHAR(190) NULL, +JOB_GROUP VARCHAR(190) NULL, +IS_NONCONCURRENT VARCHAR(1) NULL, +REQUESTS_RECOVERY VARCHAR(1) NULL, +PRIMARY KEY (SCHED_NAME,ENTRY_ID)) +ENGINE=InnoDB; + +CREATE TABLE QRTZ_SCHEDULER_STATE ( +SCHED_NAME VARCHAR(120) NOT NULL, +INSTANCE_NAME VARCHAR(190) NOT NULL, +LAST_CHECKIN_TIME BIGINT(13) NOT NULL, +CHECKIN_INTERVAL BIGINT(13) NOT NULL, +PRIMARY KEY (SCHED_NAME,INSTANCE_NAME)) +ENGINE=InnoDB; + +CREATE TABLE QRTZ_LOCKS ( +SCHED_NAME VARCHAR(120) NOT NULL, +LOCK_NAME VARCHAR(40) NOT NULL, +PRIMARY KEY (SCHED_NAME,LOCK_NAME)) +ENGINE=InnoDB; + +CREATE INDEX IDX_QRTZ_J_REQ_RECOVERY ON QRTZ_JOB_DETAILS(SCHED_NAME,REQUESTS_RECOVERY); +CREATE INDEX IDX_QRTZ_J_GRP ON QRTZ_JOB_DETAILS(SCHED_NAME,JOB_GROUP); + +CREATE INDEX IDX_QRTZ_T_J ON QRTZ_TRIGGERS(SCHED_NAME,JOB_NAME,JOB_GROUP); +CREATE INDEX IDX_QRTZ_T_JG ON QRTZ_TRIGGERS(SCHED_NAME,JOB_GROUP); +CREATE INDEX IDX_QRTZ_T_C ON QRTZ_TRIGGERS(SCHED_NAME,CALENDAR_NAME); +CREATE INDEX IDX_QRTZ_T_G ON QRTZ_TRIGGERS(SCHED_NAME,TRIGGER_GROUP); +CREATE INDEX IDX_QRTZ_T_STATE ON QRTZ_TRIGGERS(SCHED_NAME,TRIGGER_STATE); +CREATE INDEX IDX_QRTZ_T_N_STATE ON QRTZ_TRIGGERS(SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP,TRIGGER_STATE); +CREATE INDEX IDX_QRTZ_T_N_G_STATE ON QRTZ_TRIGGERS(SCHED_NAME,TRIGGER_GROUP,TRIGGER_STATE); +CREATE INDEX IDX_QRTZ_T_NEXT_FIRE_TIME ON QRTZ_TRIGGERS(SCHED_NAME,NEXT_FIRE_TIME); +CREATE INDEX IDX_QRTZ_T_NFT_ST ON QRTZ_TRIGGERS(SCHED_NAME,TRIGGER_STATE,NEXT_FIRE_TIME); +CREATE INDEX IDX_QRTZ_T_NFT_MISFIRE ON QRTZ_TRIGGERS(SCHED_NAME,MISFIRE_INSTR,NEXT_FIRE_TIME); +CREATE INDEX IDX_QRTZ_T_NFT_ST_MISFIRE ON QRTZ_TRIGGERS(SCHED_NAME,MISFIRE_INSTR,NEXT_FIRE_TIME,TRIGGER_STATE); +CREATE INDEX IDX_QRTZ_T_NFT_ST_MISFIRE_GRP ON QRTZ_TRIGGERS(SCHED_NAME,MISFIRE_INSTR,NEXT_FIRE_TIME,TRIGGER_GROUP,TRIGGER_STATE); + +CREATE INDEX IDX_QRTZ_FT_TRIG_INST_NAME ON QRTZ_FIRED_TRIGGERS(SCHED_NAME,INSTANCE_NAME); +CREATE INDEX IDX_QRTZ_FT_INST_JOB_REQ_RCVRY ON QRTZ_FIRED_TRIGGERS(SCHED_NAME,INSTANCE_NAME,REQUESTS_RECOVERY); +CREATE INDEX IDX_QRTZ_FT_J_G ON QRTZ_FIRED_TRIGGERS(SCHED_NAME,JOB_NAME,JOB_GROUP); +CREATE INDEX IDX_QRTZ_FT_JG ON QRTZ_FIRED_TRIGGERS(SCHED_NAME,JOB_GROUP); +CREATE INDEX IDX_QRTZ_FT_T_G ON QRTZ_FIRED_TRIGGERS(SCHED_NAME,TRIGGER_NAME,TRIGGER_GROUP); +CREATE INDEX IDX_QRTZ_FT_TG ON QRTZ_FIRED_TRIGGERS(SCHED_NAME,TRIGGER_GROUP); + +commit;