diff --git a/go.mod b/go.mod index 6ed6d60f4..0edb6cfad 100644 --- a/go.mod +++ b/go.mod @@ -45,6 +45,8 @@ require ( golang.org/x/crypto v0.46.0 golang.org/x/sync v0.19.0 golang.org/x/sys v0.39.0 + golang.org/x/text v0.32.0 + golang.org/x/time v0.0.0-20220922220347-f3bd1da661af gopkg.in/ini.v1 v1.67.0 gopkg.in/yaml.v2 v2.4.0 gopkg.in/yaml.v3 v3.0.1 @@ -131,8 +133,6 @@ require ( golang.org/x/exp v0.0.0-20250305212735-054e65f0b394 // indirect golang.org/x/net v0.47.0 // indirect golang.org/x/term v0.38.0 // indirect - golang.org/x/text v0.32.0 // indirect - golang.org/x/time v0.0.0-20220922220347-f3bd1da661af // indirect golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect google.golang.org/protobuf v1.36.5 // indirect gotest.tools/v3 v3.0.2 // indirect diff --git a/server/filesystem/compress.go b/server/filesystem/compress.go index f2775cb31..ff2c16d23 100644 --- a/server/filesystem/compress.go +++ b/server/filesystem/compress.go @@ -10,10 +10,12 @@ import ( "strings" "sync/atomic" "time" + "unicode/utf8" "emperror.dev/errors" "github.com/klauspost/compress/zip" "github.com/mholt/archives" + "golang.org/x/text/encoding/simplifiedchinese" "github.com/pterodactyl/wings/internal/ufs" "github.com/pterodactyl/wings/server/filesystem/archiverext" @@ -190,6 +192,33 @@ type extractStreamOptions struct { Reader io.Reader } +// decodeFilename attempts to decode a filename from an archive, automatically +// detecting and converting GBK-encoded filenames to UTF-8. This is necessary +// because many Windows applications in China create ZIP files with GBK-encoded +// filenames instead of UTF-8. +// +// The function uses a simple but effective heuristic: +// - If the filename is valid UTF-8, return it as-is +// - If the filename is not valid UTF-8, attempt to decode it as GBK +// - If GBK decoding fails, return the original filename +func decodeFilename(filename string) string { + // Check if it's already valid UTF-8 + if utf8.ValidString(filename) { + // Valid UTF-8, return as-is + return filename + } + + // Not valid UTF-8, try to decode as GBK + decoded, err := simplifiedchinese.GBK.NewDecoder().String(filename) + if err != nil { + // GBK decoding failed, return original + return filename + } + + // Successfully decoded from GBK + return decoded +} + func (fs *Filesystem) extractStream(ctx context.Context, opts extractStreamOptions) error { // See if it's a compressed archive, such as TAR or a ZIP ex, ok := opts.Format.(archives.Extractor) @@ -261,7 +290,9 @@ func (fs *Filesystem) extractStream(ctx context.Context, opts extractStreamOptio if f.IsDir() { return nil } - p := filepath.Join(opts.Directory, f.NameInArchive) + // Decode the filename, converting from GBK to UTF-8 if necessary + decodedName := decodeFilename(f.NameInArchive) + p := filepath.Join(opts.Directory, decodedName) // If it is ignored, just don't do anything with the file and skip over it. if err := fs.IsIgnored(p); err != nil { return nil diff --git a/server/filesystem/compress_test.go b/server/filesystem/compress_test.go index 80cf70800..fd718c14f 100644 --- a/server/filesystem/compress_test.go +++ b/server/filesystem/compress_test.go @@ -4,8 +4,10 @@ import ( "context" "os" "testing" + "unicode/utf8" . "github.com/franela/goblin" + "golang.org/x/text/encoding/simplifiedchinese" ) // Given an archive named test.{ext}, with the following file structure: @@ -52,3 +54,64 @@ func TestFilesystem_DecompressFile(t *testing.T) { }) }) } + +// Test for GBK-encoded filenames in archives +func TestFilesystem_DecompressFile_GBK(t *testing.T) { + g := Goblin(t) + fs, rfs := NewFs() + + g.Describe("Decompress GBK-encoded filenames", func() { + g.It("can decompress a zip with GBK-encoded filenames", func() { + // copy the file to the new FS + c, err := os.ReadFile("./testdata/test-gbk.zip") + g.Assert(err).IsNil() + err = rfs.CreateServerFile("./test-gbk.zip", c) + g.Assert(err).IsNil() + + // decompress + err = fs.DecompressFile(context.Background(), "/", "test-gbk.zip") + g.Assert(err).IsNil() + + // make sure the file was extracted with proper UTF-8 filename + _, err = rfs.StatServerFile("测试文件夹/测试文档.txt") + g.Assert(err).IsNil() + }) + + g.AfterEach(func() { + _ = fs.TruncateRootDirectory() + }) + }) +} + +// Test the decodeFilename helper function +func TestDecodeFilename(t *testing.T) { + g := Goblin(t) + + g.Describe("decodeFilename", func() { + g.It("should pass through valid UTF-8 strings", func() { + input := "test/测试文件.txt" + output := decodeFilename(input) + g.Assert(output).Equal(input) + }) + + g.It("should pass through ASCII strings", func() { + input := "test/file.txt" + output := decodeFilename(input) + g.Assert(output).Equal(input) + }) + + g.It("should convert GBK to UTF-8", func() { + // Create a GBK-encoded string + utf8String := "测试文件.txt" + gbkString, err := simplifiedchinese.GBK.NewEncoder().String(utf8String) + g.Assert(err).IsNil() + + // Verify it's not valid UTF-8 + g.Assert(utf8.ValidString(gbkString)).IsFalse() + + // Decode and verify it matches original UTF-8 + output := decodeFilename(gbkString) + g.Assert(output).Equal(utf8String) + }) + }) +} diff --git a/server/filesystem/testdata/test-gbk.zip b/server/filesystem/testdata/test-gbk.zip new file mode 100644 index 000000000..6818c0d4f Binary files /dev/null and b/server/filesystem/testdata/test-gbk.zip differ