jjjake · polymood · Sep 23, 2025 · Sep 23, 2025
diff --git a/internetarchive/files.py b/internetarchive/files.py
@@ -235,6 +235,8 @@ def download(  # noqa: C901,PLR0911,PLR0912,PLR0915
         self.item.session.mount_http_adapter(max_retries=retries)
         file_path = file_path or self.name
 
+        file_path = os.path.basename(file_path)
+
         # Critical security check: Sanitize only the filename portion of file_path to
         # prevent invalid characters and potential directory traversal issues.
         # We use `utils.sanitize_filepath` instead of `utils.sanitize_filename` because:
@@ -253,15 +255,14 @@ def download(  # noqa: C901,PLR0911,PLR0912,PLR0915
                 raise OSError(f'{destdir} is not a directory!')
             file_path = os.path.join(destdir, file_path)
 
+
         # Critical security check: Prevent directory traversal attacks by ensuring
         # the download path doesn't escape the target directory using path resolution
         # and relative path validation. This protects against malicious filenames
         # containing ../ sequences or other path manipulation attempts.
         try:
-            # Resolve both paths to handle symlinks and absolute paths
-            target_path = Path(file_path).resolve()
             base_dir = Path(destdir).resolve() if destdir else Path.cwd().resolve()
-            # Ensure the target path is relative to base directory
+            target_path = (base_dir / file_path).resolve()
             target_path.relative_to(base_dir)
         except ValueError:
             raise ValueError(f"Download path {file_path} is outside target directory {base_dir}")

diff --git a/internetarchive/utils.py b/internetarchive/utils.py
@@ -480,8 +480,9 @@ def sanitize_filepath(filepath: str, avoid_colon: bool = False) -> str:
     Sanitizes only the filename part of a full file path, leaving the directory path intact.
 
     This is useful when you need to ensure the filename is safe for filesystem use
-    without modifying the directory structure. Typically used before creating files
-    or directories to prevent invalid filename characters.
+    without modifying the directory structure. This implementation robustly handles both
+    simple filenames and full paths by avoiding `os.path` functions that can have
+    unreliable, platform-specific behavior with certain characters (e.g., colons on Windows).
 
     Args:
         filepath (str): The full file path to sanitize.
@@ -491,10 +492,29 @@ def sanitize_filepath(filepath: str, avoid_colon: bool = False) -> str:
     Returns:
         str: The sanitized file path with the filename portion percent-encoded as needed.
     """
-    parent_dir = os.path.dirname(filepath)
-    filename = os.path.basename(filepath)
-    sanitized_filename = sanitize_filename(filename, avoid_colon)
-    return os.path.join(parent_dir, sanitized_filename)
+    # First, handle the case where the input is a simple filename (no directory path).
+    # This avoids issues with os.path functions misinterpreting characters like colons
+    # in filenames as drive separators on Windows.
+    if os.sep not in filepath and '/' not in filepath:
+        return sanitize_filename(filepath, avoid_colon)
+
+    # For full paths, reliably separate the directory from the filename using
+    # string splitting, which is safer than relying on os.path functions.
+    # We prefer the POSIX separator for cross-platform consistency if present.
+    separator = '/' if '/' in filepath else os.sep
+    parts = filepath.rsplit(separator, 1)
+
+    if len(parts) == 2:
+        # The path was successfully split into a directory and a filename.
+        parent_dir, filename = parts
+        sanitized_filename = sanitize_filename(filename, avoid_colon)
+
+        # Rejoin the directory with the now-sanitized filename.
+        return parent_dir + separator + sanitized_filename
+    else:
+        # This is a fallback for unusual cases (e.g., an absolute path root '/').
+        # We treat the whole string as a filename to ensure it's always sanitized.
+        return sanitize_filename(filepath, avoid_colon)
 
 
 def sanitize_filename(name: str, avoid_colon: bool = False) -> str:

diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -173,15 +173,36 @@
 
 
 def test_sanitize_filepath():
-    # Test with colon encoding
-    result = internetarchive.utils.sanitize_filepath('/path/to/file:name.txt', True)
-    assert result == '/path/to/file%3Aname.txt'
+    """
+    Tests the sanitize_filepath function in both Windows and POSIX environments.
+    """
+    # --- Test POSIX-like behavior (e.g., Linux, macOS) ---
+    with patch('internetarchive.utils.is_windows', return_value=False):
+        # On POSIX, colon is only encoded when avoid_colon=True
+        posix_path = '/path/to/file:name.txt'
 
-    # Test without colon encoding
-    result = internetarchive.utils.sanitize_filepath('/path/to/file:name.txt', False)
-    assert result == '/path/to/file:name.txt'  # Colon not encoded on POSIX by default
+        # Test with colon encoding enabled
+        result_avoid_colon = internetarchive.utils.sanitize_filepath(posix_path, avoid_colon=True)
+        assert result_avoid_colon == '/path/to/file%3Aname.txt'
+
+        # Test with colon encoding disabled (default)
+        result_no_avoid_colon = internetarchive.utils.sanitize_filepath(posix_path, avoid_colon=False)
+        assert result_no_avoid_colon == '/path/to/file:name.txt'
 
-    # Test Windows path (mocked)
+        # Test another invalid char that should always be sanitized
+        result_other_char = internetarchive.utils.sanitize_filepath('/path/to/file/name.txt')
+        assert result_other_char == '/path/to/file%2Fname.txt'
+
+    # --- Test Windows-specific behavior ---
     with patch('internetarchive.utils.is_windows', return_value=True):
-        result = internetarchive.utils.sanitize_filepath('/path/to/con.txt')
-        assert result == '/path/to/con.txt'  # Reserved name sanitized
+        # On Windows, colon is ALWAYS an invalid character and must be encoded,
+        # regardless of the 'avoid_colon' flag.
+        win_path = 'C:\\path\\to\\file:name.txt'
+
+        # Test with avoid_colon=True (should encode the colon)
+        result_avoid_colon_win = internetarchive.utils.sanitize_filepath(win_path, avoid_colon=True)
+        assert result_avoid_colon_win == 'C:\\path\\to\\file%3Aname.txt'
+
+        # Test with avoid_colon=False (should STILL encode the colon)
+        result_no_avoid_colon_win = internetarchive.utils.sanitize_filepath(win_path, avoid_colon=False)
+        assert result_no_avoid_colon_win == 'C:\\path\\to\\file%3Aname.txt'