Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
package dgu.newsee.domain.news.controller;
package dgu.newsee.domain.crawlednews.controller;

import dgu.newsee.domain.news.dto.NewsCrawlRequestDTO;
import dgu.newsee.domain.news.dto.NewsCrawlResponseDTO;
import dgu.newsee.domain.news.entity.News;
import dgu.newsee.domain.news.service.NewsService;
import dgu.newsee.domain.crawlednews.dto.NewsCrawlRequestDTO;
import dgu.newsee.domain.crawlednews.dto.NewsCrawlResponseDTO;
import dgu.newsee.domain.crawlednews.entity.NewsOrigin;
import dgu.newsee.domain.crawlednews.service.NewsService;
import dgu.newsee.global.payload.ApiResponse;
import dgu.newsee.global.payload.ResponseCode;
import dgu.newsee.global.security.CustomUserDetails;
Expand All @@ -30,10 +30,10 @@ public ApiResponse<?> crawlNews(

try {
Long userId = userDetails.getUserId();
News news = newsService.crawlAndSave(request, userId);
NewsOrigin newsOrigin = newsService.crawlAndSave(request, userId);

return ApiResponse.success(
new NewsCrawlResponseDTO(news),
new NewsCrawlResponseDTO(newsOrigin),
ResponseCode.COMMON_SUCCESS
);
} catch (IllegalArgumentException e) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package dgu.newsee.domain.news.dto;
package dgu.newsee.domain.crawlednews.dto;

import lombok.Getter;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package dgu.newsee.domain.crawlednews.dto;

import dgu.newsee.domain.crawlednews.entity.NewsOrigin;
import lombok.Getter;

import java.time.LocalDateTime;

@Getter
public class NewsCrawlResponseDTO {
private String title;
private String content;
private String imageUrl;
private String category;
private String source;
private LocalDateTime time;
private Long newsId;

public NewsCrawlResponseDTO(NewsOrigin newsOrigin) {
this.title = newsOrigin.getTitle();
this.content = newsOrigin.getContent();
this.imageUrl = newsOrigin.getImageUrl();
this.category = newsOrigin.getCategory();
this.source = newsOrigin.getSource();
this.time = newsOrigin.getTime();
this.newsId = newsOrigin.getId();
}
}

Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
package dgu.newsee.domain.crawlednews.entity;

import dgu.newsee.global.common.BaseEntity;
import jakarta.persistence.*;
import lombok.*;
Expand All @@ -11,14 +10,18 @@
@NoArgsConstructor(access = AccessLevel.PROTECTED)
@AllArgsConstructor
@Builder
public class CrawledNews extends BaseEntity {
public class NewsOrigin extends BaseEntity {

@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
private Long id;

private String title;

@Column(length = 1024)
private String imageUrl;


@Lob
private String content;

Expand All @@ -29,4 +32,7 @@ public class CrawledNews extends BaseEntity {
private LocalDateTime time;

private String originalUrl;

@Enumerated(EnumType.STRING) // DB에는 USER_INPUT, AUTO_CRAWLED로 저장됨
private NewsStatus status;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
package dgu.newsee.domain.crawlednews.entity;

public enum NewsStatus {
USER_INPUT, // 0
AUTO_CRAWLED // 1
}
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package dgu.newsee.domain.crawlednews.repository;

import dgu.newsee.domain.crawlednews.entity.CrawledNews;
import dgu.newsee.domain.crawlednews.entity.NewsOrigin;
import org.springframework.data.jpa.repository.JpaRepository;

public interface CrawledNewsRepository extends JpaRepository<CrawledNews, Long> {
public interface NewsRepository extends JpaRepository<NewsOrigin, Long> {
boolean existsByOriginalUrl(String url);
}
Original file line number Diff line number Diff line change
@@ -1,41 +1,53 @@
package dgu.newsee.domain.crawlednews.service;

import dgu.newsee.domain.crawlednews.entity.CrawledNews;
import dgu.newsee.domain.crawlednews.repository.CrawledNewsRepository;
import dgu.newsee.domain.crawlednews.entity.NewsOrigin;
import dgu.newsee.domain.crawlednews.entity.NewsStatus;
import dgu.newsee.domain.crawlednews.repository.NewsRepository;
import dgu.newsee.domain.crawlednews.util.CrawledNewsCrawler;
import dgu.newsee.domain.crawlednews.util.CrawledNewsResult;
import dgu.newsee.domain.crawlednews.util.ParsedNews;
import dgu.newsee.domain.transformednews.service.TransformedNewsService;
import lombok.RequiredArgsConstructor;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;

import java.util.List;

@Service
@RequiredArgsConstructor
public class CrawledNewsService {

private final CrawledNewsCrawler crawler;
private final CrawledNewsRepository repository;
private final NewsRepository newsRepository;
private final TransformedNewsService transformedNewsService;


@Transactional
public void crawlAndSave(String url, String category) {
String normalizedUrl = url.replace("/comment", "").split("\\?")[0];

if (repository.existsByOriginalUrl(normalizedUrl)) {
if (newsRepository.existsByOriginalUrl(normalizedUrl)) {
System.out.println("중복된 뉴스 URL → 저장하지 않음: " + normalizedUrl);
return;
}

try {
CrawledNewsResult result = crawler.crawl(normalizedUrl, category);
CrawledNews news = CrawledNews.builder()
ParsedNews result = crawler.crawl(normalizedUrl, category);

NewsOrigin news = NewsOrigin.builder()
.title(result.getTitle())
.content(result.getContent())
.imageUrl((result.getImageUrl()))
.category(result.getCategory())
.source(result.getSource())
.time(result.getTime())
.originalUrl(normalizedUrl)
.status(NewsStatus.AUTO_CRAWLED)
.build();
repository.save(news);
newsRepository.save(news);
System.out.println("크롤링 및 저장 완료: " + normalizedUrl);

transformedNewsService.requestTransformAndSaveAllLevels(news.getId(), NewsStatus.AUTO_CRAWLED);

} catch (Exception e) {
System.err.println("크롤링 실패: " + normalizedUrl + " → " + e.getMessage());
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
package dgu.newsee.domain.news.service;
package dgu.newsee.domain.crawlednews.service;

import dgu.newsee.domain.news.dto.NewsCrawlRequestDTO;
import dgu.newsee.domain.news.entity.News;
import dgu.newsee.domain.news.repository.NewsRepository;
import dgu.newsee.domain.news.util.NewsCrawlResult;
import dgu.newsee.domain.news.util.NewsCrawler;
import dgu.newsee.domain.crawlednews.dto.NewsCrawlRequestDTO;
import dgu.newsee.domain.crawlednews.entity.NewsOrigin;
import dgu.newsee.domain.crawlednews.entity.NewsStatus;
import dgu.newsee.domain.crawlednews.repository.NewsRepository;
import dgu.newsee.domain.crawlednews.util.NewsCrawler;
import dgu.newsee.domain.crawlednews.util.ParsedNews;
import dgu.newsee.domain.transformednews.service.TransformedNewsService;
import dgu.newsee.domain.user.entity.User;
import dgu.newsee.domain.user.repository.UserRepository;
import dgu.newsee.domain.news.entity.SavedNews;
import dgu.newsee.domain.news.repository.SavedNewsRepository;
import lombok.RequiredArgsConstructor;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
Expand All @@ -20,10 +20,10 @@ public class NewsService {
private final NewsCrawler crawler;
private final NewsRepository newsRepository;
private final UserRepository userRepository;
private final SavedNewsRepository savedNewsRepository;
private final TransformedNewsService transformedService;

@Transactional
public News crawlAndSave(NewsCrawlRequestDTO request, Long userId) {
public NewsOrigin crawlAndSave(NewsCrawlRequestDTO request, Long userId) {
String url = request.getUrl();

// 중복 저장 방지
Expand All @@ -33,31 +33,31 @@ public News crawlAndSave(NewsCrawlRequestDTO request, Long userId) {

try {
// 뉴스 크롤링
NewsCrawlResult result = crawler.crawl(url);
ParsedNews result = crawler.crawl(url);

// News 객체 저장
News news = News.builder()
NewsOrigin newsOrigin = NewsOrigin.builder()
.title(result.getTitle())
.content(result.getContent())
.imageUrl(result.getImageUrl())
.category(result.getCategory())
.source(result.getSource())
.time(result.getTime())
.originalUrl(url)
.status(NewsStatus.USER_INPUT)
.build();
newsRepository.save(news);
newsRepository.save(newsOrigin);

transformedService.requestTransformAndSaveAllLevels(
newsOrigin.getId(),
NewsStatus.USER_INPUT
);

// 사용자 조회
User user = userRepository.findById(userId)
.orElseThrow(() -> new IllegalArgumentException("사용자를 찾을 수 없습니다."));

// 사용자와 뉴스 연결 (SavedNews 테이블에 저장)
SavedNews savedNews = SavedNews.builder()
.user(user)
.news(news)
.build();
savedNewsRepository.save(savedNews);

return news;
return newsOrigin;

} catch (Exception e) {
throw new RuntimeException("크롤링 실패: " + e.getMessage());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,12 @@
import org.springframework.stereotype.Component;

import java.io.IOException;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;

@Component
public class CrawledNewsCrawler {

public CrawledNewsResult crawl(String url, String category) throws IOException {
public ParsedNews crawl(String url, String category) throws IOException {
Document doc = Jsoup.connect(url).get();

String title = doc.select("meta[property=og:title]").attr("content");
String content = doc.select("#dic_area").text();
String source = doc.select("meta[property=og:article:author]").attr("content");
if (source.isBlank()) {
source = doc.select("meta[property=og:site_name]").attr("content");
}

String rawTime = doc.select("meta[property=og:article:published_time]").attr("content");
LocalDateTime time;
try {
time = LocalDateTime.parse(rawTime, DateTimeFormatter.ISO_OFFSET_DATE_TIME);
} catch (Exception e) {
time = LocalDateTime.now();
}

return new CrawledNewsResult(title, content, category, source, time, url);
return NewsParserUtil.parse(doc, category, url); // 카테고리는 호출하는 쪽에서 지정
}
}
16 changes: 16 additions & 0 deletions src/main/java/dgu/newsee/domain/crawlednews/util/NewsCrawler.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package dgu.newsee.domain.crawlednews.util;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.springframework.stereotype.Component;

import java.io.IOException;

@Component
public class NewsCrawler {

public ParsedNews crawl(String url) throws IOException {
Document doc = Jsoup.connect(url).get();
return NewsParserUtil.parse(doc, null, url);
}
}
Loading