Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ public enum ErrorStatus implements BaseErrorCode {
BOOKMARK_NOT_FOUND(HttpStatus.BAD_REQUEST, "BOOKMARK_4005", "북마크가 없습니다."),
BOOKMARK_DUPLICATE(HttpStatus.BAD_REQUEST, "BOOKMARK_4006", "이미 북마크가 되어있습니다."),

// 크롤링 관련 에러 5000
CRAWLING_NOT_EXIST(HttpStatus.BAD_REQUEST, "CRAWLING_5001", "유효하지 않은 URL 형식입니다."),
CRAWLING_ERROR(HttpStatus.INTERNAL_SERVER_ERROR, "CRAWLING_5002", "크롤링 중 오류가 발생했습니다."),

// 카테고리 관련 에러 7000
CATEGORY_NOT_FOUND(HttpStatus.BAD_REQUEST, "CATEGORY_7001", "카테고리가 없습니다."),
// 회원별 관심 카테고리 관련 에러 8000
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ public enum SuccessStatus implements BaseCode {
// 일정 생성기 관련 응답
GENERATOR_OK(HttpStatus.OK, "GENERATOR_3000", "성공입니다."),
// 북마크 관련 응답
BOOKMARK_OK(HttpStatus.OK, "BOOKMARK_4000", "성공입니다.")

;
BOOKMARK_OK(HttpStatus.OK, "BOOKMARK_4000", "성공입니다."),
// 크롤링 관련 응답
CRAWLING_OK(HttpStatus.OK, "CRAWLING_5000", "성공입니다.");

private final HttpStatus httpStatus;
private final String code;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package Capstone.AutoScheduler.global.converter;

import Capstone.AutoScheduler.global.web.dto.CrawlingResponseDTO;
import java.util.List;

public class CrawlingConverter{
// html의 body와 css파일 DTO로 변환
public static CrawlingResponseDTO.GetCrawlingResultDTO toGetCrawlingResultDTO(List<String> htmlContent) {
return CrawlingResponseDTO.GetCrawlingResultDTO.builder()
.cssFile(htmlContent.get(0))
.htmlBody(htmlContent.get(1))
.build();
}
}

Original file line number Diff line number Diff line change
@@ -1,20 +1,28 @@
package Capstone.AutoScheduler.global.service.SeleniumService;

import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.openqa.selenium.support.ui.ExpectedConditions;
import org.openqa.selenium.support.ui.WebDriverWait;
import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Service;

import java.net.MalformedURLException;
import java.net.URL;
import java.time.Duration;
import java.util.ArrayList;
import java.util.List;

@Service
public class WebCrawlerService {

public String getHtmlContent(String url) {
public List<String> getHtmlContent(String url) {
List<String> htmlContent = new ArrayList<>();
WebDriver driver = null;

try {
Expand All @@ -38,8 +46,30 @@ public String getHtmlContent(String url) {
WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(10));
wait.until(ExpectedConditions.presenceOfElementLocated(org.openqa.selenium.By.tagName("body")));

// HTML 소스 반환
return driver.getPageSource();
// header의 CSS파일 가져오기
String cssFiles = "";
List<WebElement> links = driver.findElements(By.xpath("//link[@rel='stylesheet']"));
for (WebElement link : links) {
String cssLink = link.getAttribute("href");
cssFiles += "<link rel=\"stylesheet\" href=\"" + cssLink + "\">";
}
htmlContent.add(cssFiles);

// html의 body만 가져오기
String bodyContent = driver.findElement(By.tagName("body")).getAttribute("outerHTML");
htmlContent.add(bodyContent);

return htmlContent;

// // HTML 소스 가져와서 변수에 저장
// String htmlContent = driver.getPageSource();
//
// // HTML을 MIME 타입으로 반환
// HttpHeaders headers = new HttpHeaders();
// headers.add(HttpHeaders.CONTENT_TYPE, "text/html; charset=UTF-8");
//
// // ResponseEntity와 getBody로 body return
// return new ResponseEntity<>(htmlContent, headers, HttpStatus.OK).getBody();
} catch (MalformedURLException e) {
throw new IllegalArgumentException("유효하지 않은 URL 형식입니다: " + url);
} catch (Exception e) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
package Capstone.AutoScheduler.global.web.controller;

import Capstone.AutoScheduler.global.apiPayload.ApiResponse;
import Capstone.AutoScheduler.global.apiPayload.code.status.ErrorStatus;
import Capstone.AutoScheduler.global.apiPayload.code.status.SuccessStatus;
import Capstone.AutoScheduler.global.converter.CrawlingConverter;
import Capstone.AutoScheduler.global.converter.EventConverter;
import Capstone.AutoScheduler.global.service.SeleniumService.WebCrawlerService;
import Capstone.AutoScheduler.global.web.dto.CrawlingResponseDTO;
import Capstone.AutoScheduler.global.web.dto.Event.EventResponseDTO;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.Parameter;
import org.springframework.beans.factory.annotation.Autowired;
Expand All @@ -11,6 +18,8 @@
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;

import java.util.List;

@RestController
public class CrawlerController {

Expand All @@ -22,23 +31,19 @@ public class CrawlerController {
description = "URL을 입력하면 해당 페이지의 HTML 소스를 반환합니다."
)
@GetMapping("/crawl")
public ResponseEntity<String> crawl(
public ApiResponse<CrawlingResponseDTO.GetCrawlingResultDTO> crawl(
@Parameter(description = "크롤링할 URL", required = true)
@RequestParam String url
) {
try {
// HTML 크롤링 결과 가져오기
String htmlContent = webCrawlerService.getHtmlContent(url);

// HTML을 MIME 타입으로 반환
HttpHeaders headers = new HttpHeaders();
headers.add(HttpHeaders.CONTENT_TYPE, "text/html; charset=UTF-8");
List<String> htmlContent = webCrawlerService.getHtmlContent(url);

return new ResponseEntity<>(htmlContent, headers, HttpStatus.OK);
return ApiResponse.onSuccess(SuccessStatus.CRAWLING_OK, CrawlingConverter.toGetCrawlingResultDTO(htmlContent));
} catch (IllegalArgumentException e) {
return new ResponseEntity<>("유효하지 않은 URL 형식입니다: " + e.getMessage(), HttpStatus.BAD_REQUEST);
return ApiResponse.onFailure(ErrorStatus.CRAWLING_NOT_EXIST.getCode(), ErrorStatus.CRAWLING_NOT_EXIST.getMessage(), null);
} catch (Exception e) {
return new ResponseEntity<>("크롤링 중 오류가 발생했습니다: " + e.getMessage(), HttpStatus.INTERNAL_SERVER_ERROR);
return ApiResponse.onFailure(ErrorStatus.CRAWLING_ERROR.getCode(), ErrorStatus.CRAWLING_NOT_EXIST.getMessage(), null);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package Capstone.AutoScheduler.global.web.dto;

import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Getter;
import lombok.NoArgsConstructor;

import java.time.LocalDateTime;

public class CrawlingResponseDTO {
// html의 body와 cssFile링크
@Builder
@Getter
@NoArgsConstructor
@AllArgsConstructor
public static class GetCrawlingResultDTO {
String cssFile;
String htmlBody;
}
}