From 199798ee183ca0f2fc68515a007164fa91e91651 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 12 Feb 2026 13:38:19 +0000 Subject: [PATCH 1/5] Initial plan From a368659fea8e24abd0a63e4d31796178a5458dbc Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 12 Feb 2026 13:50:36 +0000 Subject: [PATCH 2/5] Fix Chinese filename handling in ZIP archives - Create ZipFS wrapper in archiverext package to handle GBK-encoded filenames - Automatically decode GBK to UTF-8 when walking ZIP archives - Update archiverFileSystem to use ZipFS wrapper for ZIP files - Add test for SpaceAvailableForDecompression with GBK filenames Co-authored-by: vlssu <43847794+vlssu@users.noreply.github.com> --- server/filesystem/archiverext/zip.go | 210 +++++++++++++++++++++++++++ server/filesystem/compress.go | 10 +- server/filesystem/compress_test.go | 12 ++ 3 files changed, 226 insertions(+), 6 deletions(-) create mode 100644 server/filesystem/archiverext/zip.go diff --git a/server/filesystem/archiverext/zip.go b/server/filesystem/archiverext/zip.go new file mode 100644 index 000000000..f77718dd6 --- /dev/null +++ b/server/filesystem/archiverext/zip.go @@ -0,0 +1,210 @@ +package archiverext + +import ( + "io" + "io/fs" + "time" + "unicode/utf8" + + "github.com/klauspost/compress/zip" + "golang.org/x/text/encoding/simplifiedchinese" +) + +// ZipFS is a wrapper around zip.Reader that implements fs.FS with +// automatic filename decoding. It handles GBK-encoded filenames +// (common in Chinese Windows systems) by converting them to UTF-8. +type ZipFS struct { + reader *zip.Reader +} + +// NewZipFS creates a new ZipFS from a ReaderAt and size. +func NewZipFS(r io.ReaderAt, size int64) (*ZipFS, error) { + zr, err := zip.NewReader(r, size) + if err != nil { + return nil, err + } + return &ZipFS{reader: zr}, nil +} + +// Open opens the named file from the ZIP archive. +// It automatically decodes GBK-encoded filenames to UTF-8. +func (z *ZipFS) Open(name string) (fs.File, error) { + // Try to open with the name as-is first (for UTF-8 filenames) + var targetFile *zip.File + for _, f := range z.reader.File { + if f.Name == name { + targetFile = f + break + } + } + + // If not found, try to find a file with a matching decoded name + if targetFile == nil { + for _, f := range z.reader.File { + decodedName := decodeZipFilename(f.Name) + if decodedName == name { + targetFile = f + break + } + } + } + + if targetFile == nil { + return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrNotExist} + } + + rc, err := targetFile.Open() + if err != nil { + return nil, err + } + + return &zipFile{ + ReadCloser: rc, + file: targetFile, + }, nil +} + +// ReadDir reads the directory named by name from the ZIP archive. +// It returns directory entries with decoded filenames. +func (z *ZipFS) ReadDir(name string) ([]fs.DirEntry, error) { + // For zip files, we need to build the directory listing + // by examining all files and filtering by prefix + var entries []fs.DirEntry + + // Normalize the directory name + if name == "." { + name = "" + } else if name != "" && name[len(name)-1] != '/' { + name = name + "/" + } + + seen := make(map[string]bool) + for _, f := range z.reader.File { + decodedName := decodeZipFilename(f.Name) + + // Skip files not in this directory + if name != "" && !hasPrefix(decodedName, name) { + continue + } + + // Get the relative path within this directory + relPath := decodedName + if name != "" { + relPath = decodedName[len(name):] + } + + // Skip if this is the directory itself + if relPath == "" { + continue + } + + // Extract the first path component + var entryName string + idx := indexOf(relPath, '/') + if idx >= 0 { + entryName = relPath[:idx] + } else { + entryName = relPath + } + + // Skip duplicates + if seen[entryName] { + continue + } + seen[entryName] = true + + // Determine if this entry is a directory + isDir := idx >= 0 && idx < len(relPath)-1 + + entries = append(entries, &zipDirEntry{ + name: entryName, + isDir: isDir, + info: &f.FileHeader, + }) + } + + return entries, nil +} + +// Stat returns file information for the named file from the ZIP archive. +func (z *ZipFS) Stat(name string) (fs.FileInfo, error) { + if name == "." { + // Return info for root directory + return &zipRootInfo{}, nil + } + + // Try to find the file with decoded name + for _, f := range z.reader.File { + decodedName := decodeZipFilename(f.Name) + if decodedName == name || decodedName == name+"/" { + return f.FileInfo(), nil + } + } + + return nil, &fs.PathError{Op: "stat", Path: name, Err: fs.ErrNotExist} +} + +// decodeZipFilename decodes a filename from a ZIP archive, automatically +// detecting and converting GBK-encoded filenames to UTF-8. +func decodeZipFilename(filename string) string { + // Check if it's already valid UTF-8 + if utf8.ValidString(filename) { + return filename + } + + // Not valid UTF-8, try to decode as GBK + decoded, err := simplifiedchinese.GBK.NewDecoder().String(filename) + if err != nil { + // GBK decoding failed, return original + return filename + } + + // Successfully decoded from GBK + return decoded +} + +// Helper functions +func hasPrefix(s, prefix string) bool { + return len(s) >= len(prefix) && s[:len(prefix)] == prefix +} + +func indexOf(s string, c byte) int { + for i := 0; i < len(s); i++ { + if s[i] == c { + return i + } + } + return -1 +} + +// zipFile wraps an io.ReadCloser from a zip.File and implements fs.File +type zipFile struct { + io.ReadCloser + file *zip.File +} + +func (zf *zipFile) Stat() (fs.FileInfo, error) { + return zf.file.FileInfo(), nil +} + +// zipDirEntry implements fs.DirEntry for ZIP file entries +type zipDirEntry struct { + name string + isDir bool + info *zip.FileHeader +} + +func (e *zipDirEntry) Name() string { return e.name } +func (e *zipDirEntry) IsDir() bool { return e.isDir } +func (e *zipDirEntry) Type() fs.FileMode { return e.info.Mode().Type() } +func (e *zipDirEntry) Info() (fs.FileInfo, error) { return e.info.FileInfo(), nil } + +// zipRootInfo implements fs.FileInfo for the root directory +type zipRootInfo struct{} + +func (i *zipRootInfo) Name() string { return "." } +func (i *zipRootInfo) Size() int64 { return 0 } +func (i *zipRootInfo) Mode() fs.FileMode { return fs.ModeDir | 0755 } +func (i *zipRootInfo) ModTime() time.Time { return time.Time{} } +func (i *zipRootInfo) IsDir() bool { return true } +func (i *zipRootInfo) Sys() interface{} { return nil } diff --git a/server/filesystem/compress.go b/server/filesystem/compress.go index 2db7ac326..a31b51865 100644 --- a/server/filesystem/compress.go +++ b/server/filesystem/compress.go @@ -13,7 +13,6 @@ import ( "unicode/utf8" "emperror.dev/errors" - "github.com/klauspost/compress/zip" "github.com/mholt/archives" "golang.org/x/text/encoding/simplifiedchinese" @@ -81,11 +80,10 @@ func (fs *Filesystem) archiverFileSystem(ctx context.Context, p string) (iofs.FS if format != nil { switch ff := format.(type) { case archives.Zip: - // zip.Reader is more performant than ArchiveFS, because zip.Reader caches content information - // and zip.Reader can open several content files concurrently because of io.ReaderAt requirement - // while ArchiveFS can't. - // zip.Reader doesn't suffer from issue #330 and #310 according to local test (but they should be fixed anyway) - return zip.NewReader(f, info.Size()) + // Use our custom ZipFS wrapper that handles GBK-encoded filenames + // This is more performant than ArchiveFS, because it caches content information + // and can open several content files concurrently because of io.ReaderAt requirement. + return archiverext.NewZipFS(f, info.Size()) case archives.Extraction: return &archives.ArchiveFS{Stream: io.NewSectionReader(f, 0, info.Size()), Format: ff, Context: ctx}, nil case archives.Compression: diff --git a/server/filesystem/compress_test.go b/server/filesystem/compress_test.go index fd718c14f..f24004a3d 100644 --- a/server/filesystem/compress_test.go +++ b/server/filesystem/compress_test.go @@ -77,6 +77,18 @@ func TestFilesystem_DecompressFile_GBK(t *testing.T) { g.Assert(err).IsNil() }) + g.It("can check space for a zip with GBK-encoded filenames", func() { + // copy the file to the new FS + c, err := os.ReadFile("./testdata/test-gbk.zip") + g.Assert(err).IsNil() + err = rfs.CreateServerFile("./test-gbk.zip", c) + g.Assert(err).IsNil() + + // check space availability + err = fs.SpaceAvailableForDecompression(context.Background(), "/", "test-gbk.zip") + g.Assert(err).IsNil() + }) + g.AfterEach(func() { _ = fs.TruncateRootDirectory() }) From 55899b39485abafd7837e6faafe2379c4d3590d7 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 12 Feb 2026 13:52:09 +0000 Subject: [PATCH 3/5] Add proper file handle cleanup for archive space checks - Add Close() method to ZipFS to properly close underlying file - Update SpaceAvailableForDecompression to close filesystem after use - Prevents file handle leaks that could cause "unexpected EOF" errors Co-authored-by: vlssu <43847794+vlssu@users.noreply.github.com> --- server/filesystem/archiverext/zip.go | 18 +++++++++++++++++- server/filesystem/compress.go | 5 +++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/server/filesystem/archiverext/zip.go b/server/filesystem/archiverext/zip.go index f77718dd6..679b72af3 100644 --- a/server/filesystem/archiverext/zip.go +++ b/server/filesystem/archiverext/zip.go @@ -15,6 +15,7 @@ import ( // (common in Chinese Windows systems) by converting them to UTF-8. type ZipFS struct { reader *zip.Reader + file io.Closer } // NewZipFS creates a new ZipFS from a ReaderAt and size. @@ -23,7 +24,22 @@ func NewZipFS(r io.ReaderAt, size int64) (*ZipFS, error) { if err != nil { return nil, err } - return &ZipFS{reader: zr}, nil + + // Keep a reference to the underlying file if it's a Closer + var closer io.Closer + if c, ok := r.(io.Closer); ok { + closer = c + } + + return &ZipFS{reader: zr, file: closer}, nil +} + +// Close closes the underlying file if it's a Closer. +func (z *ZipFS) Close() error { + if z.file != nil { + return z.file.Close() + } + return nil } // Open opens the named file from the ZIP archive. diff --git a/server/filesystem/compress.go b/server/filesystem/compress.go index a31b51865..677026cf8 100644 --- a/server/filesystem/compress.go +++ b/server/filesystem/compress.go @@ -112,6 +112,11 @@ func (fs *Filesystem) SpaceAvailableForDecompression(ctx context.Context, dir st } return err } + + // Close the filesystem after we're done to release file handles + if closer, ok := fsys.(io.Closer); ok { + defer closer.Close() + } // Create a context with timeout to prevent long delays on large archives timeoutCtx, cancel := context.WithTimeout(ctx, 5*time.Second) From 3266bc9215bdbf21619c376f6e65bcdab97bed7d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 12 Feb 2026 13:53:12 +0000 Subject: [PATCH 4/5] Format code with gofmt Co-authored-by: vlssu <43847794+vlssu@users.noreply.github.com> --- server/filesystem/archiverext/zip.go | 8 ++++---- server/filesystem/compress.go | 2 +- server/filesystem/compress_test.go | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/server/filesystem/archiverext/zip.go b/server/filesystem/archiverext/zip.go index 679b72af3..22154fdbe 100644 --- a/server/filesystem/archiverext/zip.go +++ b/server/filesystem/archiverext/zip.go @@ -24,13 +24,13 @@ func NewZipFS(r io.ReaderAt, size int64) (*ZipFS, error) { if err != nil { return nil, err } - + // Keep a reference to the underlying file if it's a Closer var closer io.Closer if c, ok := r.(io.Closer); ok { closer = c } - + return &ZipFS{reader: zr, file: closer}, nil } @@ -86,7 +86,7 @@ func (z *ZipFS) ReadDir(name string) ([]fs.DirEntry, error) { // For zip files, we need to build the directory listing // by examining all files and filtering by prefix var entries []fs.DirEntry - + // Normalize the directory name if name == "." { name = "" @@ -97,7 +97,7 @@ func (z *ZipFS) ReadDir(name string) ([]fs.DirEntry, error) { seen := make(map[string]bool) for _, f := range z.reader.File { decodedName := decodeZipFilename(f.Name) - + // Skip files not in this directory if name != "" && !hasPrefix(decodedName, name) { continue diff --git a/server/filesystem/compress.go b/server/filesystem/compress.go index 677026cf8..ebec52698 100644 --- a/server/filesystem/compress.go +++ b/server/filesystem/compress.go @@ -112,7 +112,7 @@ func (fs *Filesystem) SpaceAvailableForDecompression(ctx context.Context, dir st } return err } - + // Close the filesystem after we're done to release file handles if closer, ok := fsys.(io.Closer); ok { defer closer.Close() diff --git a/server/filesystem/compress_test.go b/server/filesystem/compress_test.go index f24004a3d..8ad6c49a2 100644 --- a/server/filesystem/compress_test.go +++ b/server/filesystem/compress_test.go @@ -117,10 +117,10 @@ func TestDecodeFilename(t *testing.T) { utf8String := "测试文件.txt" gbkString, err := simplifiedchinese.GBK.NewEncoder().String(utf8String) g.Assert(err).IsNil() - + // Verify it's not valid UTF-8 g.Assert(utf8.ValidString(gbkString)).IsFalse() - + // Decode and verify it matches original UTF-8 output := decodeFilename(gbkString) g.Assert(output).Equal(utf8String) From 89e880cab8deb2c0df1cd2e702936fa92fbac869 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 12 Feb 2026 13:54:11 +0000 Subject: [PATCH 5/5] Address code review feedback - Fix directory detection logic to properly handle trailing slashes - Replace deprecated interface{} with any - Improve directory classification using FileInfo().IsDir() Co-authored-by: vlssu <43847794+vlssu@users.noreply.github.com> --- server/filesystem/archiverext/zip.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/server/filesystem/archiverext/zip.go b/server/filesystem/archiverext/zip.go index 22154fdbe..43b53717e 100644 --- a/server/filesystem/archiverext/zip.go +++ b/server/filesystem/archiverext/zip.go @@ -129,8 +129,9 @@ func (z *ZipFS) ReadDir(name string) ([]fs.DirEntry, error) { } seen[entryName] = true - // Determine if this entry is a directory - isDir := idx >= 0 && idx < len(relPath)-1 + // Determine if this entry is a directory by checking if there's more path after it + // or if the original file is marked as a directory + isDir := idx >= 0 || f.FileInfo().IsDir() entries = append(entries, &zipDirEntry{ name: entryName, @@ -223,4 +224,4 @@ func (i *zipRootInfo) Size() int64 { return 0 } func (i *zipRootInfo) Mode() fs.FileMode { return fs.ModeDir | 0755 } func (i *zipRootInfo) ModTime() time.Time { return time.Time{} } func (i *zipRootInfo) IsDir() bool { return true } -func (i *zipRootInfo) Sys() interface{} { return nil } +func (i *zipRootInfo) Sys() any { return nil }