From 38172b421fd7ac81f3e2e4728ea4cbfaa85fa644 Mon Sep 17 00:00:00 2001 From: multiflexi Date: Tue, 29 Oct 2024 17:03:58 +0100 Subject: [PATCH] fix bug in web collector --- src/collectors/collectors/web_collector.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/collectors/collectors/web_collector.py b/src/collectors/collectors/web_collector.py index d2e81389..4e7a5fb8 100644 --- a/src/collectors/collectors/web_collector.py +++ b/src/collectors/collectors/web_collector.py @@ -788,8 +788,12 @@ def __process_title_page_articles(self, browser, title_page_handle, index_url): news_item = self.__process_article_page(index_url, browser) if news_item: logger.debug(f"{self.collector_source} ... Title : {news_item.title}") - logger.debug(f"{self.collector_source} ... Review : {news_item.review.replace('\r', '').replace('\n', ' ').strip()[:100]}") - logger.debug(f"{self.collector_source} ... Content : {news_item.content.replace('\r', '').replace('\n', ' ').strip()[:100]}") + logger.debug( + f"{self.collector_source} ... Review : {news_item.review.replace('\r', '').replace('\n', ' ').strip()[:100]}" + ) + logger.debug( + f"{self.collector_source} ... Content : {news_item.content.replace('\r', '').replace('\n', ' ').strip()[:100]}" + ) logger.debug(f"{self.collector_source} ... Published: {news_item.published}") self.news_items.append(news_item) else: @@ -871,6 +875,6 @@ def __process_article_page(self, index_url, browser): key = "Additional_ID" binary_mime_type = "" binary_value = "" - attribute = NewsItemAttribute(uuid.uuid4(), key, value, binary_mime_type, binary_value) + attribute = NewsItemAttribute(key, value, binary_mime_type, binary_value) news_item.attributes.append(attribute) return news_item