diff --git a/pyproject.toml b/pyproject.toml index 2c8ddd7..a3af7ee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "hatchling.build" [project] name = "socketsecurity" -version = "2.1.3" +version = "2.1.9" requires-python = ">= 3.10" license = {"file" = "LICENSE"} dependencies = [ diff --git a/socketsecurity/__init__.py b/socketsecurity/__init__.py index a9cb4f2..09ba8ba 100644 --- a/socketsecurity/__init__.py +++ b/socketsecurity/__init__.py @@ -1,2 +1,2 @@ __author__ = 'socket.dev' -__version__ = '2.1.3' +__version__ = '2.1.9' diff --git a/socketsecurity/core/__init__.py b/socketsecurity/core/__init__.py index 7245b4d..9ee8c01 100644 --- a/socketsecurity/core/__init__.py +++ b/socketsecurity/core/__init__.py @@ -133,25 +133,40 @@ def create_sbom_output(self, diff: Diff) -> dict: @staticmethod def expand_brace_pattern(pattern: str) -> List[str]: """ - Expands brace expressions (e.g., {a,b,c}) into separate patterns. - """ - brace_regex = re.compile(r"\{([^{}]+)\}") - - # Expand all brace groups - expanded_patterns = [pattern] - while any("{" in p for p in expanded_patterns): - new_patterns = [] - for pat in expanded_patterns: - match = brace_regex.search(pat) - if match: - options = match.group(1).split(",") # Extract values inside {} - prefix, suffix = pat[:match.start()], pat[match.end():] - new_patterns.extend([prefix + opt + suffix for opt in options]) - else: - new_patterns.append(pat) - expanded_patterns = new_patterns - - return expanded_patterns + Recursively expands brace expressions (e.g., {a,b,c}) into separate patterns, supporting nested braces. + """ + def recursive_expand(pat: str) -> List[str]: + stack = [] + for i, c in enumerate(pat): + if c == '{': + stack.append(i) + elif c == '}' and stack: + start = stack.pop() + if not stack: + # Found the outermost pair + before = pat[:start] + after = pat[i+1:] + inner = pat[start+1:i] + # Split on commas not inside nested braces + options = [] + depth = 0 + last = 0 + for j, ch in enumerate(inner): + if ch == '{': + depth += 1 + elif ch == '}': + depth -= 1 + elif ch == ',' and depth == 0: + options.append(inner[last:j]) + last = j+1 + options.append(inner[last:]) + results = [] + for opt in options: + expanded = before + opt + after + results.extend(recursive_expand(expanded)) + return results + return [pat] + return recursive_expand(pattern) @staticmethod def is_excluded(file_path: str, excluded_dirs: Set[str]) -> bool: @@ -176,13 +191,7 @@ def find_files(self, path: str) -> List[str]: files: Set[str] = set() # Get supported patterns from the API - try: - patterns = self.get_supported_patterns() - except Exception as e: - log.error(f"Error getting supported patterns from API: {e}") - log.warning("Falling back to local patterns") - from .utils import socket_globs as fallback_patterns - patterns = fallback_patterns + patterns = self.get_supported_patterns() for ecosystem in patterns: if ecosystem in self.config.excluded_ecosystems: @@ -642,7 +651,6 @@ def create_new_diff( try: new_scan_start = time.time() new_full_scan = self.create_full_scan(files_for_sending, params) - new_full_scan.sbom_artifacts = self.get_sbom_data(new_full_scan.id) new_scan_end = time.time() log.info(f"Total time to create new full scan: {new_scan_end - new_scan_start:.2f}") except APIFailure as e: diff --git a/socketsecurity/core/classes.py b/socketsecurity/core/classes.py index aefb0ab..8357458 100644 --- a/socketsecurity/core/classes.py +++ b/socketsecurity/core/classes.py @@ -97,7 +97,7 @@ class AlertCounts(TypedDict): low: int @dataclass(kw_only=True) -class Package(SocketArtifactLink): +class Package(): """ Represents a package detected in a Socket Security scan. @@ -106,16 +106,23 @@ class Package(SocketArtifactLink): """ # Common properties from both artifact types - id: str + type: str name: str version: str - type: str + release: str + diffType: str + id: str + author: List[str] = field(default_factory=list) score: SocketScore alerts: List[SocketAlert] - author: List[str] = field(default_factory=list) size: Optional[int] = None license: Optional[str] = None namespace: Optional[str] = None + topLevelAncestors: Optional[List[str]] = None + direct: Optional[bool] = False + manifestFiles: Optional[List[SocketManifestReference]] = None + dependencies: Optional[List[str]] = None + artifact: Optional[SocketArtifactLink] = None # Package-specific fields license_text: str = "" @@ -203,7 +210,9 @@ def from_diff_artifact(cls, data: dict) -> "Package": manifestFiles=ref.get("manifestFiles", []), dependencies=ref.get("dependencies"), artifact=ref.get("artifact"), - namespace=data.get('namespace', None) + namespace=data.get('namespace', None), + release=ref.get("release", None), + diffType=ref.get("diffType", None), ) class Issue: diff --git a/socketsecurity/core/helper/__init__.py b/socketsecurity/core/helper/__init__.py new file mode 100644 index 0000000..f10cb6e --- /dev/null +++ b/socketsecurity/core/helper/__init__.py @@ -0,0 +1,119 @@ +import markdown +from bs4 import BeautifulSoup, NavigableString, Tag +import string + + +class Helper: + @staticmethod + def parse_gfm_section(html_content): + """ + Parse a GitHub-Flavored Markdown section containing a table and surrounding content. + Returns a dict with "before_html", "columns", "rows_html", and "after_html". + """ + html = markdown.markdown(html_content, extensions=['extra']) + soup = BeautifulSoup(html, "html.parser") + + table = soup.find('table') + if not table: + # If no table, treat entire content as before_html + return {"before_html": html, "columns": [], "rows_html": [], "after_html": ''} + + # Collect HTML before the table + before_parts = [str(elem) for elem in table.find_previous_siblings()] + before_html = ''.join(reversed(before_parts)) + + # Collect HTML after the table + after_parts = [str(elem) for elem in table.find_next_siblings()] + after_html = ''.join(after_parts) + + # Extract table headers + headers = [th.get_text(strip=True) for th in table.find_all('th')] + + # Extract table rows (skip header) + rows_html = [] + for tr in table.find_all('tr')[1:]: + cells = [str(td) for td in tr.find_all('td')] + rows_html.append(cells) + + return { + "before_html": before_html, + "columns": headers, + "rows_html": rows_html, + "after_html": after_html + } + + @staticmethod + def parse_cell(html_td): + """Convert a table cell HTML into plain text or a dict for links/images.""" + soup = BeautifulSoup(html_td, "html.parser") + a = soup.find('a') + if a: + cell = {"url": a.get('href', '')} + img = a.find('img') + if img: + cell.update({ + "img_src": img.get('src', ''), + "title": img.get('title', ''), + "link_text": a.get_text(strip=True) + }) + else: + cell["link_text"] = a.get_text(strip=True) + return cell + return soup.get_text(strip=True) + + @staticmethod + def parse_html_parts(html_fragment): + """ + Convert an HTML fragment into a list of parts. + Each part is either: + - {"text": "..."} + - {"link": "url", "text": "..."} + - {"img_src": "url", "alt": "...", "title": "..."} + """ + soup = BeautifulSoup(html_fragment, 'html.parser') + parts = [] + + def handle_element(elem): + if isinstance(elem, NavigableString): + text = str(elem).strip() + if text and not all(ch in string.punctuation for ch in text): + parts.append({"text": text}) + elif isinstance(elem, Tag): + if elem.name == 'a': + href = elem.get('href', '') + txt = elem.get_text(strip=True) + parts.append({"link": href, "text": txt}) + elif elem.name == 'img': + parts.append({ + "img_src": elem.get('src', ''), + "alt": elem.get('alt', ''), + "title": elem.get('title', '') + }) + else: + # Recurse into children for nested tags + for child in elem.children: + handle_element(child) + + for element in soup.contents: + handle_element(element) + + return parts + + @staticmethod + def section_to_json(section_result): + """ + Convert a parsed section into structured JSON. + Returns {"before": [...], "table": [...], "after": [...]}. + """ + # Build JSON rows for the table + table_rows = [] + cols = section_result.get('columns', []) + for row_html in section_result.get('rows_html', []): + cells = [Helper.parse_cell(cell_html) for cell_html in row_html] + table_rows.append(dict(zip(cols, cells))) + + return { + "before": Helper.parse_html_parts(section_result.get('before_html', '')), + "table": table_rows, + "after": Helper.parse_html_parts(section_result.get('after_html', '')) + } \ No newline at end of file diff --git a/socketsecurity/core/messages.py b/socketsecurity/core/messages.py index b86b37f..0b5fc62 100644 --- a/socketsecurity/core/messages.py +++ b/socketsecurity/core/messages.py @@ -292,7 +292,8 @@ def create_security_comment_json(diff: Diff) -> dict: output = { "scan_failed": scan_failed, "new_alerts": [], - "full_scan_id": diff.id + "full_scan_id": diff.id, + "diff_url": diff.diff_url } for alert in diff.new_alerts: alert: Issue diff --git a/socketsecurity/output.py b/socketsecurity/output.py index 2b523d5..a1f8647 100644 --- a/socketsecurity/output.py +++ b/socketsecurity/output.py @@ -66,7 +66,8 @@ def output_console_comments(self, diff_report: Diff, sbom_file_name: Optional[st console_security_comment = Messages.create_console_security_alert_table(diff_report) self.logger.info("Security issues detected by Socket Security:") - self.logger.info(console_security_comment) + self.logger.info(f"Diff Url: {diff_report.diff_url}") + self.logger.info(f"\n{console_security_comment}") def output_console_json(self, diff_report: Diff, sbom_file_name: Optional[str] = None) -> None: """Outputs JSON formatted results""" diff --git a/socketsecurity/socketcli.py b/socketsecurity/socketcli.py index e6594ad..283f3cd 100644 --- a/socketsecurity/socketcli.py +++ b/socketsecurity/socketcli.py @@ -235,7 +235,7 @@ def main_code(): log.debug("Updated security comment with no new alerts") # FIXME: diff.new_packages is never populated, neither is removed_packages - if (len(diff.new_packages) == 0 and len(diff.removed_packages) == 0) or config.disable_overview: + if (len(diff.new_packages) == 0) or config.disable_overview: if not update_old_overview_comment: new_overview_comment = False log.debug("No new/removed packages or Dependency Overview comment disabled") @@ -243,7 +243,6 @@ def main_code(): log.debug("Updated overview comment with no dependencies") log.debug(f"Adding comments for {config.scm}") - scm.add_socket_comments( security_comment, overview_comment,