Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import org.springframework.web.bind.annotation.*;

@RestController
@RequestMapping("/api/url/news")
@RequestMapping("/api/news/url")
@RequiredArgsConstructor
public class NewsController {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
import dgu.newsee.domain.crawlednews.entity.NewsOrigin;
import org.springframework.data.jpa.repository.JpaRepository;

import java.util.Optional;

public interface NewsRepository extends JpaRepository<NewsOrigin, Long> {
boolean existsByOriginalUrl(String url);

Optional<NewsOrigin> findByOriginalUrl(String url);
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;

import java.util.Optional;

@Service
@RequiredArgsConstructor
public class NewsService {
Expand All @@ -26,9 +28,15 @@ public class NewsService {
public NewsOrigin crawlAndSave(NewsCrawlRequestDTO request, Long userId) {
String url = request.getUrl();

// 중복 저장 방지
if (newsRepository.existsByOriginalUrl(url)) {
throw new IllegalArgumentException("이미 저장된 뉴스입니다.");
// 사용자 조회
User user = userRepository.findById(userId)
.orElseThrow(() -> new IllegalArgumentException("사용자를 찾을 수 없습니다."));


// 1. 이미 저장된 뉴스면 바로 반환
Optional<NewsOrigin> optionalNews = newsRepository.findByOriginalUrl(url);
if (optionalNews.isPresent()) {
return optionalNews.get(); // 안전하게 꺼내기
}

try {
Expand All @@ -52,12 +60,7 @@ public NewsOrigin crawlAndSave(NewsCrawlRequestDTO request, Long userId) {
newsOrigin.getId(),
NewsStatus.USER_INPUT
);

// 사용자 조회
User user = userRepository.findById(userId)
.orElseThrow(() -> new IllegalArgumentException("사용자를 찾을 수 없습니다."));

return newsOrigin;
return newsOrigin;

} catch (Exception e) {
throw new RuntimeException("크롤링 실패: " + e.getMessage());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@

import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.List;

public class NewsParserUtil {

Expand All @@ -13,7 +16,34 @@ public static ParsedNews parse(Document doc, String categoryFromCaller, String u
String title = doc.select("meta[property=og:title]").attr("content");

// 본문
String content = doc.select("#dic_area").text();
// 본문 파싱 (p 태그 우선, 없으면 br 기준으로 직접 파싱)
String content = "";

Elements paragraphs = doc.select("#dic_area > p");
if (!paragraphs.isEmpty()) {
List<String> lines = new ArrayList<>();
for (Element p : paragraphs) {
String text = p.text().trim();
if (!text.isEmpty()) lines.add(text);
}
content = String.join("\n", lines);
} else {
// fallback: br 태그 기준으로 수동 파싱
Element dicArea = doc.selectFirst("#dic_area");
if (dicArea != null) {
StringBuilder builder = new StringBuilder();
for (var node : dicArea.childNodes()) {
if (node.nodeName().equals("br")) {
builder.append("\n");
} else {
builder.append(node.toString().replaceAll("<.*?>", "").trim());
}
}
content = builder.toString().replaceAll("\n{2,}", "\n"); // 줄바꿈 2번 이상은 하나로 줄이기
}
}



// 출처
String source = doc.select("meta[property=og:article:author]").attr("content");
Expand Down Expand Up @@ -51,24 +81,26 @@ public static ParsedNews parse(Document doc, String categoryFromCaller, String u
// 카테고리 유추
String category = null;

try {
// 1. 네이버 뉴스일 경우 카테고리 직접 파싱 시도
Element selected = doc.selectFirst("a.Nitem_link_menu[aria-selected=true]");
if (selected != null) {
category = selected.text(); // 예: 생활/문화
}
// 1. 시스템 크롤링이면 외부에서 categoryFromCaller가 전달됨
if (categoryFromCaller != null && !categoryFromCaller.isBlank()) {
category = categoryFromCaller;
}

// 2. 그래도 null이면 백업으로 URL에서 유추 시도
if (category == null || category.isBlank()) {
category = extractCategoryFromUrl(url); // sid 기반
// 2. 사용자 입력 등인 경우 HTML 태그 기반 시도
if (category == null || category.isBlank()) {
Element selected = doc.selectFirst("a.Nitem_link_menu[aria-selected=true], span.Nitem_link_menu");
if (selected != null) {
category = selected.text();
}
}

// 3. 여전히 못찾으면 fallback
if (category == null || category.isBlank()) {
category = "기타";
}
// 3. 그래도 없으면 URL 내 sid= 파싱 시도
if ((category == null || category.isBlank()) && url.contains("sid=")) {
category = extractCategoryFromUrl(url);
}

} catch (Exception e) {
// 4. 여전히 못 찾으면 fallback
if (category == null || category.isBlank()) {
category = "기타";
}

Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,16 @@
import dgu.newsee.domain.crawlednews.entity.NewsOrigin;
import dgu.newsee.domain.crawlednews.entity.NewsStatus;
import dgu.newsee.domain.crawlednews.repository.NewsRepository;
import dgu.newsee.domain.transformednews.dto.ApiResponse;
import dgu.newsee.global.exception.AiServerException;
import dgu.newsee.global.payload.ApiResponse;
import dgu.newsee.domain.transformednews.dto.TransformRequestDTO;
import dgu.newsee.domain.transformednews.dto.TransformedNewsResponseDTO;
import dgu.newsee.domain.transformednews.entity.NewsTransformed;
import dgu.newsee.domain.transformednews.entity.TransformLevel;
import dgu.newsee.domain.transformednews.repository.NewsTransformedRepository;
import dgu.newsee.domain.words.entity.Word;
import dgu.newsee.domain.words.repository.WordRepository;
import dgu.newsee.global.payload.ResponseCode;
import lombok.RequiredArgsConstructor;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.core.ParameterizedTypeReference;
Expand Down Expand Up @@ -79,7 +81,7 @@ public void requestTransformAndSave(Long newsId, String level, NewsStatus status
} catch (Exception e) {
System.out.println("AI 서버 호출 중 예외 발생: " + e.getMessage());
e.printStackTrace();
throw new RuntimeException("AI 서버 호출 실패");
throw new AiServerException(ResponseCode.AI_SERVER_DOWN);
}

// 응답 로그 출력
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/dgu/newsee/global/config/SecurityConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ public SecurityFilterChain filterChain(HttpSecurity http) throws Exception {
"/swagger-ui/**",
"/v3/api-docs/**",
"/api/user/kakao",
"/api/url/news",
"/api/news/url",
"/api/user/level",
"/api/test/crawled-news/**",
"/api/news/**"
Expand Down
10 changes: 10 additions & 0 deletions src/main/java/dgu/newsee/global/exception/AiServerException.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package dgu.newsee.global.exception;

import dgu.newsee.global.payload.BaseErrorCode;
import dgu.newsee.global.payload.ResponseCode;

public class AiServerException extends GeneralException {
public AiServerException(BaseErrorCode code) {
super(ResponseCode.AI_SERVER_DOWN);
}
}
2 changes: 2 additions & 0 deletions src/main/java/dgu/newsee/global/payload/ApiResponse.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Getter;
import lombok.NoArgsConstructor;

@Getter
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class ApiResponse<T> {
private boolean isSuccess;
Expand Down
3 changes: 2 additions & 1 deletion src/main/java/dgu/newsee/global/payload/ResponseCode.java
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ public enum ResponseCode implements BaseErrorCode {
// ✅ 기타
INVALID_REQUEST(ResponseCodeType.ERROR, "REQ400", "잘못된 요청입니다.", HttpStatus.BAD_REQUEST),
MISSING_PARAMETER(ResponseCodeType.ERROR, "REQ401", "필수 파라미터가 누락되었습니다.", HttpStatus.BAD_REQUEST),
PARSE_ERROR(ResponseCodeType.ERROR, "REQ402", "데이터 파싱 오류입니다.", HttpStatus.BAD_REQUEST);
PARSE_ERROR(ResponseCodeType.ERROR, "REQ402", "데이터 파싱 오류입니다.", HttpStatus.BAD_REQUEST),
AI_SERVER_DOWN(ResponseCodeType.ERROR, "AI_001", "AI 서버가 응답하지 않습니다.", HttpStatus.SERVICE_UNAVAILABLE);

private final ResponseCodeType type;
private final String code;
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/application.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,4 @@ spring:

external:
ai:
url: https://4a1efdb53fcb.ngrok-free.app/api/news/transfer
url: https://0b96a22855d0.ngrok-free.app/api/news/transfer