🔀 Merge pull request #83 from BLink-Org/feat/76

Feat: #76 링크 정보 추출 방식 개선
BLink-Org · Oct 27, 2024 · 67ae659 · 67ae659
2 parents 8e5ded3 + b4b7260
commit 67ae659
Show file tree

Hide file tree

Showing 8 changed files with 458 additions and 316 deletions.
diff --git a/.github/workflows/dev_ci-cd.yml b/.github/workflows/dev_ci-cd.yml
@@ -4,6 +4,7 @@ on:
   push:
     branches:
       - dev
+      - feat/76
   workflow_dispatch:
 
 jobs:

diff --git a/build.gradle b/build.gradle
@@ -52,6 +52,15 @@ dependencies {
 
 	implementation 'io.sentry:sentry-spring-boot-starter-jakarta:7.14.0'
 
+	implementation 'com.google.api-client:google-api-client:1.33.0'
+	implementation 'com.google.apis:google-api-services-youtube:v3-rev222-1.25.0'
+	implementation 'com.google.oauth-client:google-oauth-client-jetty:1.33.1'
+	implementation 'com.google.api-client:google-api-client-jackson2:2.4.0'
+	implementation 'com.fasterxml.jackson.core:jackson-core:2.15.2'
+	implementation 'com.fasterxml.jackson.core:jackson-databind:2.15.2'
+	implementation 'com.fasterxml.jackson.core:jackson-annotations:2.15.2'
+
+
 	developmentOnly("org.springframework.boot:spring-boot-docker-compose")
 	testAndDevelopmentOnly("org.springframework.boot:spring-boot-docker-compose")
 

diff --git a/src/main/java/cmc/blink/domain/link/business/DefaultLinkInfoExtractor.java b/src/main/java/cmc/blink/domain/link/business/DefaultLinkInfoExtractor.java
@@ -0,0 +1,323 @@
+package cmc.blink.domain.link.business;
+
+import cmc.blink.domain.link.presentation.dto.LinkResponse;
+import cmc.blink.global.exception.LinkException;
+import cmc.blink.global.exception.constant.ErrorCode;
+import cmc.blink.global.util.opengraph.OpenGraph;
+import com.google.api.client.json.gson.GsonFactory;
+import com.google.api.services.youtube.YouTube;
+import com.google.api.services.youtube.YouTubeRequestInitializer;
+import com.google.api.services.youtube.model.Video;
+import com.google.api.services.youtube.model.VideoListResponse;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.stereotype.Component;
+import org.springframework.web.util.HtmlUtils;
+
+import java.io.IOException;
+import java.net.ProtocolException;
+import java.net.URL;
+import java.net.UnknownHostException;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Optional;
+import java.util.Random;
+
+@Component
+public class DefaultLinkInfoExtractor implements LinkInfoExtractor {
+
+    private static final List<String> USER_AGENT_LIST = Arrays.asList(
+            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
+            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
+            "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15"
+    );
+
+    @Value("${gcp.api-key}")
+    private String apiKey;
+
+    @Override
+    public LinkResponse.LinkInfo extractInfo(String domain, String url) throws Exception {
+        return switch (domain) {
+            case "youtu.be", "youtube.com" -> fetchYoutubeLinkInfo(url);
+            case "instagram.com" -> fetchInstagramLinkInfo(url);
+            case "blog.naver.com" -> fetchNaverBlogLinkInfo(url);
+            case "cafe.naver.com" -> fetchNaverCafeLinkInfo(url);
+            case "x.com" -> fetchTwitterLinkInfo(url);
+            case "brunch.co.kr" -> fetchBrunchLinkInfo(url);
+            default -> fetchLinkInfo(url);
+        };
+    }
+
+    private LinkResponse.LinkInfo fetchYoutubeLinkInfo(String url) {
+        try {
+            String videoId = extractYoutubeVideoId(url);
+
+            YouTube youtube = new YouTube.Builder(
+                    new com.google.api.client.http.javanet.NetHttpTransport(),
+                    GsonFactory.getDefaultInstance(),
+                    null
+            ).setYouTubeRequestInitializer(new YouTubeRequestInitializer(apiKey))
+                    .setApplicationName("youtube-data-api-example")
+                    .build();
+
+            YouTube.Videos.List videoRequest = youtube.videos()
+                    .list("snippet")
+                    .setId(videoId);
+
+            VideoListResponse response = videoRequest.execute();
+            List<Video> videoList = response.getItems();
+
+            if (videoList.isEmpty()) {
+                throw new LinkException(ErrorCode.INVALID_LINK_URL);
+            }
+
+            Video video = videoList.get(0);
+
+            String title = video.getSnippet().getTitle();
+
+            String channelTitle = video.getSnippet().getChannelTitle();
+
+            String description = video.getSnippet().getDescription();
+
+            String thumbnailUrl = video.getSnippet().getThumbnails().getDefault().getUrl();
+
+            String type = "Youtube";
+
+            String contents = String.format("%s | %s", channelTitle, description);
+
+            return LinkMapper.toLinkInfo(title, type, contents, thumbnailUrl);
+        } catch (Exception e) {
+            e.printStackTrace();
+            return null;
+        }
+    }
+
+    private String extractYoutubeVideoId(String url) throws LinkException {
+        try {
+            if (url.contains("youtu.be/")) {
+                return url.substring(url.lastIndexOf("/") + 1).split("\\?")[0];
+            }
+
+            if (url.contains("youtube.com/shorts/")) {
+                return url.substring(url.indexOf("/shorts/") + 8).split("\\?")[0];
+            }
+
+            URL videoUrl = new URL(url);
+            String query = videoUrl.getQuery();
+
+            if (query != null) {
+                String[] queryParams = query.split("&");
+                for (String param : queryParams) {
+                    if (param.startsWith("v=")) {
+                        return param.split("=")[1];
+                    }
+                }
+            }
+
+            String path = videoUrl.getPath();
+            if (path.contains("/embed/")) {
+                return path.substring(path.lastIndexOf("/embed/") + 7);
+            }
+
+        } catch (Exception e) {
+            throw new LinkException(ErrorCode.INVALID_LINK_URL);
+        }
+        throw new LinkException(ErrorCode.INVALID_LINK_URL);
+    }
+
+    private LinkResponse.LinkInfo fetchInstagramLinkInfo(String url) {
+        try {
+            OpenGraph openGraph = new OpenGraph(url, true);
+
+            String type = getOpenGraphContent(openGraph, "site_name");
+
+            if (type.isEmpty()) {
+                type = "Instagram Profile";
+            }
+
+            String title = getOpenGraphContent(openGraph, "title");
+            int titleIndex = title.indexOf("on Instagram: ");
+            if (titleIndex != -1 && title.length() > titleIndex + "on Instagram: ".length()) {
+                title = title.substring(titleIndex + "on Instagram: ".length()).trim();
+            } else if (type.equals("Instagram Profile")) {
+                title = title.trim();
+            } else {
+                title = "";
+            }
+
+            String contents = getOpenGraphContent(openGraph, "description");
+            int contentIndex = contents.indexOf(": ");
+            if (contentIndex != -1 && contents.length() > contentIndex + 2) {
+                contents = contents.substring(contentIndex + 2).trim();
+            } else if (type.equals("Instagram Profile")) {
+                contents = contents.trim();
+            } else {
+                contents = "";
+            }
+
+            String imageUrl = getOpenGraphContent(openGraph, "image");
+
+            return LinkMapper.toLinkInfo(title, type, contents, imageUrl);
+        } catch (Exception e) {
+            throw new LinkException(ErrorCode.LINK_SCRAPED_FAILED);
+        }
+    }
+
+    private LinkResponse.LinkInfo fetchNaverBlogLinkInfo(String url) {
+        try {
+            Document doc = Jsoup.connect(url).get();
+
+            Element iframe = doc.selectFirst("iframe#mainFrame");
+            if (iframe == null) {
+                throw new LinkException(ErrorCode.LINK_SCRAPED_FAILED);
+            }
+            String postUrl = "https://blog.naver.com" + iframe.attr("src");
+
+            Document postDoc = Jsoup.connect(postUrl).get();
+
+            String title = postDoc.title();
+
+            String contents = postDoc.select(".se-main-container").text();
+            if (contents.length() > 300) {
+                contents = contents.substring(0, 300);
+            }
+            Elements images = postDoc.select(".se-main-container img");
+            String imageUrl = "";
+            for (Element img : images) {
+                imageUrl = img.attr("src");
+                break;
+            }
+
+            return LinkMapper.toLinkInfo(title, "Naver", contents, imageUrl);
+        } catch (IOException e) {
+            throw new LinkException(ErrorCode.LINK_SCRAPED_FAILED);
+        }
+    }
+
+    private LinkResponse.LinkInfo fetchNaverCafeLinkInfo(String url) {
+        try {
+            Document doc = Jsoup.connect(url).get();
+
+            String title = doc.select("meta[property=og:title]").attr("content");
+            if (title.isEmpty()) {
+                title = doc.title();
+            }
+
+            String contents = doc.select("meta[property=og:description]").attr("content");
+            if (contents.isEmpty()) {
+                contents = doc.select("meta[name=description]").attr("content"); // Another fallback
+            }
+
+            String imageUrl = doc.select("meta[property=og:image]").attr("content");
+
+            return LinkMapper.toLinkInfo(title, "Naver", contents, imageUrl);
+        } catch (IOException e) {
+            throw new LinkException(ErrorCode.LINK_SCRAPED_FAILED);
+        }
+    }
+
+    private LinkResponse.LinkInfo fetchTwitterLinkInfo(String url) {
+        try {
+            Document doc = Jsoup.connect(url)
+                    .followRedirects(true)
+                    .get();
+
+            String title = doc.select("meta[property=og:title]").attr("content");
+            if (title.isEmpty()) {
+                title = doc.title();  // Fallback to the regular title if og:title is not present
+            }
+
+            String type = doc.select("meta[property=og:site_name]").attr("content");
+            if (type.isEmpty()) {
+                type = "Twitter";
+            }
+
+            String contents = doc.select("meta[property=og:description]").attr("content");
+            if (contents.isEmpty()) {
+                contents = doc.select("meta[name=description]").attr("content"); // Another fallback
+            }
+
+            String imageUrl = doc.select("meta[property=og:image]").attr("content");
+
+            return LinkMapper.toLinkInfo(title, type, contents, imageUrl);
+        } catch (IOException e) {
+            throw new LinkException(ErrorCode.LINK_SCRAPED_FAILED);
+        }
+    }
+
+    private LinkResponse.LinkInfo fetchBrunchLinkInfo(String url) {
+        try {
+            Document doc = Jsoup.connect(url).get();
+
+            System.out.printf("<<<<brunch link scrapping result HTML>>>>\n\n\n %s\n\n\n%n", doc);
+
+            String title = doc.select("meta[property=og:title]").attr("content");
+
+            if (title.isEmpty()) {
+                title = doc.title();
+            }
+
+            String contents = doc.select("meta[property=og:description]").attr("content");
+            if (contents.isEmpty()) {
+                contents = doc.select("meta[name=description]").attr("content");
+            }
+
+            String imageUrl = doc.select("meta[property=og:image]").attr("content");
+
+            return LinkMapper.toLinkInfo(title, "Brunch", contents, imageUrl);
+
+        } catch (IOException e) {
+            throw new LinkException(ErrorCode.LINK_SCRAPED_FAILED);
+        }
+    }
+
+    private String getOpenGraphContent(OpenGraph openGraph, String property) {
+        return Optional.ofNullable(openGraph.getContent(property))
+                .map(HtmlUtils::htmlUnescape)
+                .orElse("");
+    }
+
+
+    private LinkResponse.LinkInfo fetchLinkInfo(String url) throws Exception {
+        try {
+            OpenGraph openGraph = new OpenGraph(url, true);
+
+            if (openGraph.getProperties().length==0)
+                return fetchLinkInfoWithJsoup(url);
+
+            String title = getOpenGraphContent(openGraph, "title");
+            String type = getOpenGraphContent(openGraph, "site_name");
+            String contents = getOpenGraphContent(openGraph, "description");
+            String imageUrl = getOpenGraphContent(openGraph, "image");
+
+            return LinkMapper.toLinkInfo(title, type, contents, imageUrl);
+        } catch (UnknownHostException e) {
+            throw new LinkException(ErrorCode.INVALID_LINK_URL);
+        } catch (ProtocolException e) {
+            return fetchLinkInfoWithJsoup(url);
+        }
+    }
+
+    private LinkResponse.LinkInfo fetchLinkInfoWithJsoup(String url) throws IOException {
+        Document doc = Jsoup.connect(url)
+                .userAgent(getRandomUserAgent())
+                .ignoreContentType(true)
+                .followRedirects(false)
+                .get();
+
+        String title = doc.title();
+        String type = doc.select("meta[name=type]").attr("content");
+        String contents = doc.select("meta[name=description]").attr("content");
+        String imageUrl = doc.select("meta[property=og:image]").attr("content");
+
+        return LinkMapper.toLinkInfo(title, type, contents, imageUrl);
+    }
+
+    private static String getRandomUserAgent() {
+        Random random = new Random();
+        return USER_AGENT_LIST.get(random.nextInt(USER_AGENT_LIST.size()));
+    }
+}
diff --git a/src/main/java/cmc/blink/domain/link/business/LinkInfoExtractor.java b/src/main/java/cmc/blink/domain/link/business/LinkInfoExtractor.java
@@ -0,0 +1,7 @@
+package cmc.blink.domain.link.business;
+
+import cmc.blink.domain.link.presentation.dto.LinkResponse;
+
+public interface LinkInfoExtractor {
+    LinkResponse.LinkInfo extractInfo(String domain, String url) throws Exception;
+}
diff --git a/src/main/java/cmc/blink/domain/link/business/LinkMapper.java b/src/main/java/cmc/blink/domain/link/business/LinkMapper.java
@@ -10,6 +10,7 @@
 import java.time.LocalDate;
 import java.util.List;
 import java.util.Map;
+import java.util.Optional;
 import java.util.stream.Collectors;
 
 @NoArgsConstructor(access = AccessLevel.PRIVATE)
@@ -19,10 +20,10 @@ public static Link toLink(String url, User user, LinkResponse.LinkInfo linkInfo)
         return Link.builder()
                 .user(user)
                 .url(url)
-                .title(linkInfo.getTitle())
-                .type(linkInfo.getType())
-                .contents(linkInfo.getContents())
-                .imageUrl(linkInfo.getImageUrl())
+                .title(Optional.ofNullable(linkInfo.getTitle()).orElse(""))
+                .type(Optional.ofNullable(linkInfo.getType()).orElse(""))
+                .contents(Optional.ofNullable(linkInfo.getContents()).orElse(""))
+                .imageUrl(Optional.ofNullable(linkInfo.getImageUrl()).orElse(""))
                 .build();
     }
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,6 +4,7 @@ on: @@
       push:
         branches:
           - dev
+          - feat/76
       workflow_dispatch:
     jobs:
@@ Expand Down @@