diff --git a/src/main/java/dgu/newsee/domain/crawlednews/controller/NewsController.java b/src/main/java/dgu/newsee/domain/crawlednews/controller/NewsController.java index af14b12..ef7a2f7 100644 --- a/src/main/java/dgu/newsee/domain/crawlednews/controller/NewsController.java +++ b/src/main/java/dgu/newsee/domain/crawlednews/controller/NewsController.java @@ -12,7 +12,7 @@ import org.springframework.web.bind.annotation.*; @RestController -@RequestMapping("/api/url/news") +@RequestMapping("/api/news/url") @RequiredArgsConstructor public class NewsController { diff --git a/src/main/java/dgu/newsee/domain/crawlednews/repository/NewsRepository.java b/src/main/java/dgu/newsee/domain/crawlednews/repository/NewsRepository.java index 04ed6ae..fea9d8f 100644 --- a/src/main/java/dgu/newsee/domain/crawlednews/repository/NewsRepository.java +++ b/src/main/java/dgu/newsee/domain/crawlednews/repository/NewsRepository.java @@ -3,6 +3,10 @@ import dgu.newsee.domain.crawlednews.entity.NewsOrigin; import org.springframework.data.jpa.repository.JpaRepository; +import java.util.Optional; + public interface NewsRepository extends JpaRepository { boolean existsByOriginalUrl(String url); + + Optional findByOriginalUrl(String url); } diff --git a/src/main/java/dgu/newsee/domain/crawlednews/service/NewsService.java b/src/main/java/dgu/newsee/domain/crawlednews/service/NewsService.java index 8d95452..eb7649f 100644 --- a/src/main/java/dgu/newsee/domain/crawlednews/service/NewsService.java +++ b/src/main/java/dgu/newsee/domain/crawlednews/service/NewsService.java @@ -13,6 +13,8 @@ import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; +import java.util.Optional; + @Service @RequiredArgsConstructor public class NewsService { @@ -26,9 +28,15 @@ public class NewsService { public NewsOrigin crawlAndSave(NewsCrawlRequestDTO request, Long userId) { String url = request.getUrl(); - // 중복 저장 방지 - if (newsRepository.existsByOriginalUrl(url)) { - throw new IllegalArgumentException("이미 저장된 뉴스입니다."); + // 사용자 조회 + User user = userRepository.findById(userId) + .orElseThrow(() -> new IllegalArgumentException("사용자를 찾을 수 없습니다.")); + + + // 1. 이미 저장된 뉴스면 바로 반환 + Optional optionalNews = newsRepository.findByOriginalUrl(url); + if (optionalNews.isPresent()) { + return optionalNews.get(); // 안전하게 꺼내기 } try { @@ -52,12 +60,7 @@ public NewsOrigin crawlAndSave(NewsCrawlRequestDTO request, Long userId) { newsOrigin.getId(), NewsStatus.USER_INPUT ); - - // 사용자 조회 - User user = userRepository.findById(userId) - .orElseThrow(() -> new IllegalArgumentException("사용자를 찾을 수 없습니다.")); - - return newsOrigin; + return newsOrigin; } catch (Exception e) { throw new RuntimeException("크롤링 실패: " + e.getMessage()); diff --git a/src/main/java/dgu/newsee/domain/crawlednews/util/NewsParserUtil.java b/src/main/java/dgu/newsee/domain/crawlednews/util/NewsParserUtil.java index 1049a22..7e3b410 100644 --- a/src/main/java/dgu/newsee/domain/crawlednews/util/NewsParserUtil.java +++ b/src/main/java/dgu/newsee/domain/crawlednews/util/NewsParserUtil.java @@ -2,9 +2,12 @@ import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.List; public class NewsParserUtil { @@ -13,7 +16,34 @@ public static ParsedNews parse(Document doc, String categoryFromCaller, String u String title = doc.select("meta[property=og:title]").attr("content"); // 본문 - String content = doc.select("#dic_area").text(); + // 본문 파싱 (p 태그 우선, 없으면 br 기준으로 직접 파싱) + String content = ""; + + Elements paragraphs = doc.select("#dic_area > p"); + if (!paragraphs.isEmpty()) { + List lines = new ArrayList<>(); + for (Element p : paragraphs) { + String text = p.text().trim(); + if (!text.isEmpty()) lines.add(text); + } + content = String.join("\n", lines); + } else { + // fallback: br 태그 기준으로 수동 파싱 + Element dicArea = doc.selectFirst("#dic_area"); + if (dicArea != null) { + StringBuilder builder = new StringBuilder(); + for (var node : dicArea.childNodes()) { + if (node.nodeName().equals("br")) { + builder.append("\n"); + } else { + builder.append(node.toString().replaceAll("<.*?>", "").trim()); + } + } + content = builder.toString().replaceAll("\n{2,}", "\n"); // 줄바꿈 2번 이상은 하나로 줄이기 + } + } + + // 출처 String source = doc.select("meta[property=og:article:author]").attr("content"); @@ -51,24 +81,26 @@ public static ParsedNews parse(Document doc, String categoryFromCaller, String u // 카테고리 유추 String category = null; - try { - // 1. 네이버 뉴스일 경우 카테고리 직접 파싱 시도 - Element selected = doc.selectFirst("a.Nitem_link_menu[aria-selected=true]"); - if (selected != null) { - category = selected.text(); // 예: 생활/문화 - } + // 1. 시스템 크롤링이면 외부에서 categoryFromCaller가 전달됨 + if (categoryFromCaller != null && !categoryFromCaller.isBlank()) { + category = categoryFromCaller; + } - // 2. 그래도 null이면 백업으로 URL에서 유추 시도 - if (category == null || category.isBlank()) { - category = extractCategoryFromUrl(url); // sid 기반 + // 2. 사용자 입력 등인 경우 HTML 태그 기반 시도 + if (category == null || category.isBlank()) { + Element selected = doc.selectFirst("a.Nitem_link_menu[aria-selected=true], span.Nitem_link_menu"); + if (selected != null) { + category = selected.text(); } + } - // 3. 여전히 못찾으면 fallback - if (category == null || category.isBlank()) { - category = "기타"; - } + // 3. 그래도 없으면 URL 내 sid= 파싱 시도 + if ((category == null || category.isBlank()) && url.contains("sid=")) { + category = extractCategoryFromUrl(url); + } - } catch (Exception e) { + // 4. 여전히 못 찾으면 fallback + if (category == null || category.isBlank()) { category = "기타"; } diff --git a/src/main/java/dgu/newsee/domain/transformednews/dto/ApiResponse.java b/src/main/java/dgu/newsee/domain/transformednews/dto/ApiResponse.java deleted file mode 100644 index 64fa56d..0000000 --- a/src/main/java/dgu/newsee/domain/transformednews/dto/ApiResponse.java +++ /dev/null @@ -1,18 +0,0 @@ -package dgu.newsee.domain.transformednews.dto; - -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Getter; -import lombok.NoArgsConstructor; - -@Getter -@Builder -@AllArgsConstructor -@NoArgsConstructor -public class ApiResponse { - private String code; - private String message; - private T result; - private boolean success; -} - diff --git a/src/main/java/dgu/newsee/domain/transformednews/service/TransformedNewsService.java b/src/main/java/dgu/newsee/domain/transformednews/service/TransformedNewsService.java index 17feeb1..fe8ba5b 100644 --- a/src/main/java/dgu/newsee/domain/transformednews/service/TransformedNewsService.java +++ b/src/main/java/dgu/newsee/domain/transformednews/service/TransformedNewsService.java @@ -4,7 +4,8 @@ import dgu.newsee.domain.crawlednews.entity.NewsOrigin; import dgu.newsee.domain.crawlednews.entity.NewsStatus; import dgu.newsee.domain.crawlednews.repository.NewsRepository; -import dgu.newsee.domain.transformednews.dto.ApiResponse; +import dgu.newsee.global.exception.AiServerException; +import dgu.newsee.global.payload.ApiResponse; import dgu.newsee.domain.transformednews.dto.TransformRequestDTO; import dgu.newsee.domain.transformednews.dto.TransformedNewsResponseDTO; import dgu.newsee.domain.transformednews.entity.NewsTransformed; @@ -12,6 +13,7 @@ import dgu.newsee.domain.transformednews.repository.NewsTransformedRepository; import dgu.newsee.domain.words.entity.Word; import dgu.newsee.domain.words.repository.WordRepository; +import dgu.newsee.global.payload.ResponseCode; import lombok.RequiredArgsConstructor; import org.springframework.beans.factory.annotation.Value; import org.springframework.core.ParameterizedTypeReference; @@ -79,7 +81,7 @@ public void requestTransformAndSave(Long newsId, String level, NewsStatus status } catch (Exception e) { System.out.println("AI 서버 호출 중 예외 발생: " + e.getMessage()); e.printStackTrace(); - throw new RuntimeException("AI 서버 호출 실패"); + throw new AiServerException(ResponseCode.AI_SERVER_DOWN); } // 응답 로그 출력 diff --git a/src/main/java/dgu/newsee/global/config/SecurityConfig.java b/src/main/java/dgu/newsee/global/config/SecurityConfig.java index 0387ec5..71ead39 100644 --- a/src/main/java/dgu/newsee/global/config/SecurityConfig.java +++ b/src/main/java/dgu/newsee/global/config/SecurityConfig.java @@ -36,7 +36,7 @@ public SecurityFilterChain filterChain(HttpSecurity http) throws Exception { "/swagger-ui/**", "/v3/api-docs/**", "/api/user/kakao", - "/api/url/news", + "/api/news/url", "/api/user/level", "/api/test/crawled-news/**", "/api/news/**" diff --git a/src/main/java/dgu/newsee/global/exception/AiServerException.java b/src/main/java/dgu/newsee/global/exception/AiServerException.java new file mode 100644 index 0000000..dee4027 --- /dev/null +++ b/src/main/java/dgu/newsee/global/exception/AiServerException.java @@ -0,0 +1,10 @@ +package dgu.newsee.global.exception; + +import dgu.newsee.global.payload.BaseErrorCode; +import dgu.newsee.global.payload.ResponseCode; + +public class AiServerException extends GeneralException { + public AiServerException(BaseErrorCode code) { + super(ResponseCode.AI_SERVER_DOWN); + } +} diff --git a/src/main/java/dgu/newsee/global/payload/ApiResponse.java b/src/main/java/dgu/newsee/global/payload/ApiResponse.java index 7a32f4f..c6cc1cf 100644 --- a/src/main/java/dgu/newsee/global/payload/ApiResponse.java +++ b/src/main/java/dgu/newsee/global/payload/ApiResponse.java @@ -3,9 +3,11 @@ import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Getter; +import lombok.NoArgsConstructor; @Getter @Builder +@NoArgsConstructor @AllArgsConstructor public class ApiResponse { private boolean isSuccess; diff --git a/src/main/java/dgu/newsee/global/payload/ResponseCode.java b/src/main/java/dgu/newsee/global/payload/ResponseCode.java index 557783c..c7e299c 100644 --- a/src/main/java/dgu/newsee/global/payload/ResponseCode.java +++ b/src/main/java/dgu/newsee/global/payload/ResponseCode.java @@ -45,7 +45,8 @@ public enum ResponseCode implements BaseErrorCode { // ✅ 기타 INVALID_REQUEST(ResponseCodeType.ERROR, "REQ400", "잘못된 요청입니다.", HttpStatus.BAD_REQUEST), MISSING_PARAMETER(ResponseCodeType.ERROR, "REQ401", "필수 파라미터가 누락되었습니다.", HttpStatus.BAD_REQUEST), - PARSE_ERROR(ResponseCodeType.ERROR, "REQ402", "데이터 파싱 오류입니다.", HttpStatus.BAD_REQUEST); + PARSE_ERROR(ResponseCodeType.ERROR, "REQ402", "데이터 파싱 오류입니다.", HttpStatus.BAD_REQUEST), + AI_SERVER_DOWN(ResponseCodeType.ERROR, "AI_001", "AI 서버가 응답하지 않습니다.", HttpStatus.SERVICE_UNAVAILABLE); private final ResponseCodeType type; private final String code; diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index d003256..3c5a3b0 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -27,4 +27,4 @@ spring: external: ai: - url: https://4a1efdb53fcb.ngrok-free.app/api/news/transfer \ No newline at end of file + url: https://0b96a22855d0.ngrok-free.app/api/news/transfer \ No newline at end of file