diff --git a/lib/TPSGI.pm b/lib/TPSGI.pm index c23a3e4..ababb83 100755 --- a/lib/TPSGI.pm +++ b/lib/TPSGI.pm @@ -24,7 +24,6 @@ use CGI::Emulate::PSGI; use Date::Format qw{strftime}; use List::Util(); -use File::Find; use Sys::Hostname(); use Plack::MIME (); use DateTime::Format::HTTP(); @@ -78,6 +77,19 @@ my %extra_types = ( my $ct = 'Content-type'; +# MIME types that are already compressed — gzip would waste CPU and bloat responses +my %incompressible_mime = map { $_ => 1 } qw{ + image/jpeg image/png image/gif image/webp image/avif image/heic + audio/mpeg audio/ogg audio/mp4 audio/webm audio/flac + video/mp4 video/webm video/ogg video/quicktime video/x-msvideo + application/zip application/gzip application/x-bzip2 application/x-xz + application/x-7z-compressed application/zstd + application/pdf application/wasm + application/vnd.openxmlformats-officedocument.wordprocessingml.document + application/vnd.openxmlformats-officedocument.spreadsheetml.sheet + application/vnd.openxmlformats-officedocument.presentationml.presentation +}; + #memoize my $rq; @@ -342,8 +354,8 @@ sub serve { } if $streaming && $sz > $CHUNK_SIZE; - #Return data in the event the caller does not support deflate - if ( !$deflate ) { + #Return data in the event the caller does not support deflate, or content is already compressed + if ( !$deflate || $incompressible_mime{$ft} ) { push( @headers, "Content-Length" => $sz ); # Append server-timing headers @@ -353,7 +365,7 @@ sub serve { return [ $code, \@headers, $fh ]; } - #Compress everything less than 1MB + #Compress text content (< 1MB, enforced by the streaming path above) push( @headers, "Content-Encoding" => "gzip" ); my $dfh; IO::Compress::Gzip::gzip( $fh => \$dfh ); @@ -652,12 +664,7 @@ sub _app { $last_fetch = DateTime::Format::HTTP->parse_datetime( $env->{HTTP_IF_MODIFIED_SINCE} )->epoch(); } - # Figure out if we want compression or not - my $alist = $env->{HTTP_ACCEPT_ENCODING} || ''; - $alist =~ s/\s//g; - my @accept_encodings; - @accept_encodings = split( /,/, $alist ); - my $deflate = grep { 'gzip' eq $_ } @accept_encodings; + my $deflate = _accepts_gzip( $env->{HTTP_ACCEPT_ENCODING} ); # Set the IP of the request so we can fail2ban $self->{ip} = $env->{HTTP_X_FORWARDED_FOR} || $env->{REMOTE_ADDR} || $self->{ip}; @@ -738,6 +745,12 @@ sub parse_ranges { return @ranges; } +sub _accepts_gzip { + my $alist = shift // ''; + $alist =~ s/\s//g; + return (grep { 'gzip' eq $_ } map { s/;.*//r } split( /,/, $alist )) ? 1 : 0; +} + my @executable_extensions = qw{cgi sh exe pl php py}; sub appears_executable { diff --git a/t/04-compression.t b/t/04-compression.t new file mode 100644 index 0000000..28c367b --- /dev/null +++ b/t/04-compression.t @@ -0,0 +1,126 @@ +#!/usr/bin/env perl + +# Tests for HTTP compression handling in TPSGI. +# Covers: +# 1. _accepts_gzip() correctly parses Accept-Encoding quality values +# 2. serve() skips gzip for already-compressed MIME types + +use strict; +use warnings; + +use Test::More; +use File::Temp qw{tempdir tempfile}; +use File::Basename qw{dirname}; +use File::Path qw{make_path}; + +use lib 't/lib'; +use TPSGITestStubs; +use FindBin::libs; + +use TPSGI; + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +my $tmpdir = tempdir(CLEANUP => 1); +my $logfile = "$tmpdir/tpsgi.log"; +my $user = scalar getpwuid($>); +my $http_grp = (getgrgid($)))[0]; + +sub _make_tpsgi { + my $self = bless { + user => $user, + http_user => $http_grp, + tpsgi_dir => $tmpdir, + basedir => '.', + log_dir => $tmpdir, + log_name => $logfile, + verbose => 0, + routes => [], + aliases => {}, + callbacks => [], + indices => [qw{index.html index.htm index.cgi}], + ip => '127.0.0.1', + }, 'TPSGI'; + return $self; +} + +sub _write_file { + my ($dir, $name, $content) = @_; + make_path($dir) unless -d $dir; + open(my $fh, '>', "$dir/$name") or die "Cannot write $dir/$name: $!"; + print $fh $content; + close $fh; + return "$dir/$name"; +} + +# --------------------------------------------------------------------------- +# 1. _accepts_gzip — Accept-Encoding parsing +# --------------------------------------------------------------------------- + +subtest '_accepts_gzip' => sub { + is( TPSGI::_accepts_gzip('gzip'), 1, 'plain gzip accepted' ); + is( TPSGI::_accepts_gzip('gzip, deflate'), 1, 'gzip with deflate accepted' ); + is( TPSGI::_accepts_gzip('gzip;q=0.9'), 1, 'gzip with quality value accepted' ); + is( TPSGI::_accepts_gzip('gzip;q=1.0, br'), 1, 'gzip q=1.0 with br accepted' ); + is( TPSGI::_accepts_gzip('br, gzip;q=0.5'), 1, 'gzip at end with quality accepted' ); + ok( !TPSGI::_accepts_gzip('deflate, br'), 'no gzip not accepted' ); + ok( !TPSGI::_accepts_gzip(''), 'empty string not accepted' ); + ok( !TPSGI::_accepts_gzip(undef), 'undef not accepted' ); + is( TPSGI::_accepts_gzip('gzip; q=0.0'), 1, 'q=0.0 still counted as listing gzip (q=0 filtering not in scope)' ); +}; + +# --------------------------------------------------------------------------- +# 2. serve() — compressible vs incompressible types +# --------------------------------------------------------------------------- + +subtest 'serve() skips gzip for already-compressed types' => sub { + my $tpsgi = _make_tpsgi(); + my $start = [Time::HiRes::gettimeofday()]; + + # Set up a www/ subtree so serve()'s static hardlink path stays inside tmpdir + my $www = "$tmpdir/www"; + make_path($www); + + # Write a small text file — should get gzip'd + my $txt_path = _write_file($www, 'hello.txt', 'Hello, world! ' x 100); + local $@; + my $resp = eval { $tpsgi->serve("http://localhost/hello.txt", $txt_path, $start, 0, [], 0, 1) }; + SKIP: { + skip "serve() died: $@", 1 if $@; + my %hdrs = @{ $resp->[1] }; + is( $hdrs{'Content-Encoding'}, 'gzip', 'text/plain gets gzip encoded' ); + } + + # Write a fake PNG — should NOT get gzip'd + my $png_path = _write_file($www, 'img.png', 'FAKEPNGDATA' x 10); + $resp = $tpsgi->serve("http://localhost/img.png", $png_path, $start, 0, [], 0, 1); + my %hdrs = @{ $resp->[1] }; + ok( !exists $hdrs{'Content-Encoding'}, 'no Content-Encoding header for PNG' ); + + # Write a fake JPEG — should NOT get gzip'd + my $jpg_path = _write_file($www, 'photo.jpg', 'FAKEJPEGDATA' x 10); + $resp = $tpsgi->serve("http://localhost/photo.jpg", $jpg_path, $start, 0, [], 0, 1); + %hdrs = @{ $resp->[1] }; + ok( !exists $hdrs{'Content-Encoding'}, 'no Content-Encoding header for JPEG' ); + + # Write a fake ZIP — should NOT get gzip'd + my $zip_path = _write_file($www, 'archive.zip', 'PK' . 'X' x 50); + $resp = $tpsgi->serve("http://localhost/archive.zip", $zip_path, $start, 0, [], 0, 1); + %hdrs = @{ $resp->[1] }; + ok( !exists $hdrs{'Content-Encoding'}, 'no Content-Encoding header for ZIP' ); +}; + +subtest 'serve() skips gzip when deflate=0' => sub { + my $tpsgi = _make_tpsgi(); + my $start = [Time::HiRes::gettimeofday()]; + make_path("$tmpdir/www2"); + my $txt_path = _write_file("$tmpdir/www2", 'plain.txt', 'Some text content'); + + my $resp = $tpsgi->serve("http://localhost/plain.txt", $txt_path, $start, 0, [], 0, 0); + my %hdrs = @{ $resp->[1] }; + ok( !exists $hdrs{'Content-Encoding'}, 'no gzip when deflate=0' ); +}; + +done_testing; diff --git a/t/lib/TPSGITestStubs.pm b/t/lib/TPSGITestStubs.pm new file mode 100644 index 0000000..11c58da --- /dev/null +++ b/t/lib/TPSGITestStubs.pm @@ -0,0 +1,131 @@ +package TPSGITestStubs; + +# Stubs for TPSGI dependencies that may not be installed in the test environment. +# Each stub first attempts to load the real module — if that succeeds we do +# nothing. Only when the real module is absent do we register a minimal fake. +# +# Load this BEFORE 'use TPSGI' or 'use TPSGI::Startup' so Perl won't try +# to find the missing .pm files on disk. + +use strict; +use warnings; + +sub import { + _maybe_stub('Linux::Perl::inotify', 'Linux/Perl/inotify.pm', \&_stub_linux_perl_inotify); + _maybe_stub('HTTP::Parser::XS', 'HTTP/Parser/XS.pm', \&_stub_http_parser_xs); + _maybe_stub('CGI::Emulate::PSGI', 'CGI/Emulate/PSGI.pm', \&_stub_cgi_emulate_psgi); + _maybe_stub('Log::Dispatch', 'Log/Dispatch.pm', \&_stub_log_dispatch); + _maybe_stub('Log::Dispatch::Screen', 'Log/Dispatch/Screen.pm', \&_stub_log_dispatch_screen); + _maybe_stub('Log::Dispatch::FileRotate', 'Log/Dispatch/FileRotate.pm', \&_stub_log_dispatch_filerotate); + _maybe_stub('URL::Encode', 'URL/Encode.pm', \&_stub_url_encode); + _maybe_stub('DateTime::Format::HTTP', 'DateTime/Format/HTTP.pm', \&_stub_datetime_format_http); +} + +sub _maybe_stub { + my ($pkg, $path, $stubber) = @_; + return if $INC{$path}; # already loaded (real or stubbed) + return if eval { require $path; 1 }; # real module loadable — use it + $stubber->(); # install the stub +} + +sub _stub_linux_perl_inotify { + package Linux::Perl::inotify; + sub new { bless { _wds => [] }, shift } + sub add { my $self = shift; my $wd = {}; push @{$self->{_wds}}, $wd; return $wd } + sub read { return () } + $INC{'Linux/Perl/inotify.pm'} = 1; + package TPSGITestStubs; +} + +sub _stub_http_parser_xs { + package HTTP::Parser::XS; + use constant HEADERS_AS_HASHREF => 1; + sub parse_http_response { + my ($buf, $mode) = @_; + my ($status, $message) = (200, 'OK'); + my %headers; + for my $line (split /\r?\n/, $buf) { + if ($line =~ m{^HTTP/1\.(\d)\s+(\d+)\s+(.+)}) { + $status = $2 + 0; + $message = $3; + } elsif ($line =~ m{^([^:]+):\s*(.+)}) { + $headers{lc $1} = $2; + } + } + return (1, undef, 1, $status, $message, \%headers); + } + sub import { + my $class = shift; + my $caller = caller; + no strict 'refs'; + for my $sym (@_) { + *{"${caller}::${sym}"} = \&{"HTTP::Parser::XS::${sym}"}; + } + } + $INC{'HTTP/Parser/XS.pm'} = 1; + package TPSGITestStubs; +} + +sub _stub_cgi_emulate_psgi { + package CGI::Emulate::PSGI; + sub emulate_environment { return () } + $INC{'CGI/Emulate/PSGI.pm'} = 1; + package TPSGITestStubs; +} + +sub _stub_log_dispatch { + package Log::Dispatch; + sub new { bless { handlers => [] }, shift } + sub add { push @{$_[0]{handlers}}, $_[1] } + { + no strict 'refs'; + for my $lvl (qw{debug info notice warning error critical alert emergency}) { + *{"Log::Dispatch::$lvl"} = sub { }; + } + *{"Log::Dispatch::log_and_die"} = sub { die $_[-1] }; + } + $INC{'Log/Dispatch.pm'} = 1; + package TPSGITestStubs; +} + +sub _stub_log_dispatch_screen { + package Log::Dispatch::Screen; + sub new { bless {}, shift } + $INC{'Log/Dispatch/Screen.pm'} = 1; + package TPSGITestStubs; +} + +sub _stub_log_dispatch_filerotate { + package Log::Dispatch::FileRotate; + sub new { bless {}, shift } + $INC{'Log/Dispatch/FileRotate.pm'} = 1; + package TPSGITestStubs; +} + +sub _stub_url_encode { + package URL::Encode; + sub url_params_mixed { + my $qs = shift // ''; + my %params; + for my $pair (split /&/, $qs) { + my ($k, $v) = split /=/, $pair, 2; + next unless defined $k && length $k; + $k =~ s/\+/ /g; $k =~ s/%([0-9A-Fa-f]{2})/chr hex $1/ge; + $v //= ''; $v =~ s/\+/ /g; $v =~ s/%([0-9A-Fa-f]{2})/chr hex $1/ge; + $params{$k} = $v; + } + return \%params; + } + $INC{'URL/Encode.pm'} = 1; + package TPSGITestStubs; +} + +sub _stub_datetime_format_http { + package DateTime::Format::HTTP; + sub parse_datetime { bless { epoch => 0 }, ref($_[0]) || $_[0] } + sub epoch { $_[0]->{epoch} } + $INC{'DateTime/Format/HTTP.pm'} = 1; + package TPSGITestStubs; +} + +1;