Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 23 additions & 10 deletions lib/TPSGI.pm
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ use CGI::Emulate::PSGI;

use Date::Format qw{strftime};
use List::Util();
use File::Find;
use Sys::Hostname();
use Plack::MIME ();
use DateTime::Format::HTTP();
Expand Down Expand Up @@ -78,6 +77,19 @@ my %extra_types = (

my $ct = 'Content-type';

# MIME types that are already compressed — gzip would waste CPU and bloat responses

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a less than comprehensive list. I'd prefer if it was accurate for at least the list of mime types understood by Plack::MIME, given that's what we're using in practice to figure mimetypes.

my %incompressible_mime = map { $_ => 1 } qw{
image/jpeg image/png image/gif image/webp image/avif image/heic
audio/mpeg audio/ogg audio/mp4 audio/webm audio/flac
video/mp4 video/webm video/ogg video/quicktime video/x-msvideo
application/zip application/gzip application/x-bzip2 application/x-xz
application/x-7z-compressed application/zstd
application/pdf application/wasm
application/vnd.openxmlformats-officedocument.wordprocessingml.document
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
application/vnd.openxmlformats-officedocument.presentationml.presentation
};

#memoize
my $rq;

Expand Down Expand Up @@ -342,8 +354,8 @@ sub serve {
}
if $streaming && $sz > $CHUNK_SIZE;

#Return data in the event the caller does not support deflate
if ( !$deflate ) {
#Return data in the event the caller does not support deflate, or content is already compressed
if ( !$deflate || $incompressible_mime{$ft} ) {
push( @headers, "Content-Length" => $sz );

# Append server-timing headers
Expand All @@ -353,7 +365,7 @@ sub serve {
return [ $code, \@headers, $fh ];
}

#Compress everything less than 1MB
#Compress text content (< 1MB, enforced by the streaming path above)
push( @headers, "Content-Encoding" => "gzip" );
my $dfh;
IO::Compress::Gzip::gzip( $fh => \$dfh );
Expand Down Expand Up @@ -652,12 +664,7 @@ sub _app {
$last_fetch = DateTime::Format::HTTP->parse_datetime( $env->{HTTP_IF_MODIFIED_SINCE} )->epoch();
}

# Figure out if we want compression or not
my $alist = $env->{HTTP_ACCEPT_ENCODING} || '';
$alist =~ s/\s//g;
my @accept_encodings;
@accept_encodings = split( /,/, $alist );
my $deflate = grep { 'gzip' eq $_ } @accept_encodings;
my $deflate = _accepts_gzip( $env->{HTTP_ACCEPT_ENCODING} );

# Set the IP of the request so we can fail2ban
$self->{ip} = $env->{HTTP_X_FORWARDED_FOR} || $env->{REMOTE_ADDR} || $self->{ip};
Expand Down Expand Up @@ -738,6 +745,12 @@ sub parse_ranges {
return @ranges;
}

sub _accepts_gzip {
my $alist = shift // '';
$alist =~ s/\s//g;
return (grep { 'gzip' eq $_ } map { s/;.*//r } split( /,/, $alist )) ? 1 : 0;
}

my @executable_extensions = qw{cgi sh exe pl php py};

sub appears_executable {
Expand Down
126 changes: 126 additions & 0 deletions t/04-compression.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
#!/usr/bin/env perl

# Tests for HTTP compression handling in TPSGI.
# Covers:
# 1. _accepts_gzip() correctly parses Accept-Encoding quality values
# 2. serve() skips gzip for already-compressed MIME types

use strict;
use warnings;

use Test::More;
use File::Temp qw{tempdir tempfile};
use File::Basename qw{dirname};
use File::Path qw{make_path};

use lib 't/lib';
use TPSGITestStubs;
use FindBin::libs;

use TPSGI;

# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

my $tmpdir = tempdir(CLEANUP => 1);
my $logfile = "$tmpdir/tpsgi.log";
my $user = scalar getpwuid($>);
my $http_grp = (getgrgid($)))[0];

sub _make_tpsgi {
my $self = bless {
user => $user,
http_user => $http_grp,
tpsgi_dir => $tmpdir,
basedir => '.',
log_dir => $tmpdir,
log_name => $logfile,
verbose => 0,
routes => [],
aliases => {},
callbacks => [],
indices => [qw{index.html index.htm index.cgi}],
ip => '127.0.0.1',
}, 'TPSGI';
return $self;
}

sub _write_file {
my ($dir, $name, $content) = @_;
make_path($dir) unless -d $dir;
open(my $fh, '>', "$dir/$name") or die "Cannot write $dir/$name: $!";
print $fh $content;
close $fh;
return "$dir/$name";
}

# ---------------------------------------------------------------------------
# 1. _accepts_gzip — Accept-Encoding parsing
# ---------------------------------------------------------------------------

subtest '_accepts_gzip' => sub {
is( TPSGI::_accepts_gzip('gzip'), 1, 'plain gzip accepted' );
is( TPSGI::_accepts_gzip('gzip, deflate'), 1, 'gzip with deflate accepted' );
is( TPSGI::_accepts_gzip('gzip;q=0.9'), 1, 'gzip with quality value accepted' );
is( TPSGI::_accepts_gzip('gzip;q=1.0, br'), 1, 'gzip q=1.0 with br accepted' );
is( TPSGI::_accepts_gzip('br, gzip;q=0.5'), 1, 'gzip at end with quality accepted' );
ok( !TPSGI::_accepts_gzip('deflate, br'), 'no gzip not accepted' );
ok( !TPSGI::_accepts_gzip(''), 'empty string not accepted' );
ok( !TPSGI::_accepts_gzip(undef), 'undef not accepted' );
is( TPSGI::_accepts_gzip('gzip; q=0.0'), 1, 'q=0.0 still counted as listing gzip (q=0 filtering not in scope)' );
};

# ---------------------------------------------------------------------------
# 2. serve() — compressible vs incompressible types
# ---------------------------------------------------------------------------

subtest 'serve() skips gzip for already-compressed types' => sub {
my $tpsgi = _make_tpsgi();
my $start = [Time::HiRes::gettimeofday()];

# Set up a www/ subtree so serve()'s static hardlink path stays inside tmpdir
my $www = "$tmpdir/www";
make_path($www);

# Write a small text file — should get gzip'd
my $txt_path = _write_file($www, 'hello.txt', 'Hello, world! ' x 100);
local $@;
my $resp = eval { $tpsgi->serve("http://localhost/hello.txt", $txt_path, $start, 0, [], 0, 1) };
SKIP: {
skip "serve() died: $@", 1 if $@;
my %hdrs = @{ $resp->[1] };
is( $hdrs{'Content-Encoding'}, 'gzip', 'text/plain gets gzip encoded' );
}

# Write a fake PNG — should NOT get gzip'd
my $png_path = _write_file($www, 'img.png', 'FAKEPNGDATA' x 10);
$resp = $tpsgi->serve("http://localhost/img.png", $png_path, $start, 0, [], 0, 1);
my %hdrs = @{ $resp->[1] };
ok( !exists $hdrs{'Content-Encoding'}, 'no Content-Encoding header for PNG' );

# Write a fake JPEG — should NOT get gzip'd
my $jpg_path = _write_file($www, 'photo.jpg', 'FAKEJPEGDATA' x 10);
$resp = $tpsgi->serve("http://localhost/photo.jpg", $jpg_path, $start, 0, [], 0, 1);
%hdrs = @{ $resp->[1] };
ok( !exists $hdrs{'Content-Encoding'}, 'no Content-Encoding header for JPEG' );

# Write a fake ZIP — should NOT get gzip'd
my $zip_path = _write_file($www, 'archive.zip', 'PK' . 'X' x 50);
$resp = $tpsgi->serve("http://localhost/archive.zip", $zip_path, $start, 0, [], 0, 1);
%hdrs = @{ $resp->[1] };
ok( !exists $hdrs{'Content-Encoding'}, 'no Content-Encoding header for ZIP' );
};

subtest 'serve() skips gzip when deflate=0' => sub {
my $tpsgi = _make_tpsgi();
my $start = [Time::HiRes::gettimeofday()];
make_path("$tmpdir/www2");
my $txt_path = _write_file("$tmpdir/www2", 'plain.txt', 'Some text content');

my $resp = $tpsgi->serve("http://localhost/plain.txt", $txt_path, $start, 0, [], 0, 0);
my %hdrs = @{ $resp->[1] };
ok( !exists $hdrs{'Content-Encoding'}, 'no gzip when deflate=0' );
};

done_testing;
131 changes: 131 additions & 0 deletions t/lib/TPSGITestStubs.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
package TPSGITestStubs;

# Stubs for TPSGI dependencies that may not be installed in the test environment.
# Each stub first attempts to load the real module — if that succeeds we do
# nothing. Only when the real module is absent do we register a minimal fake.
#
# Load this BEFORE 'use TPSGI' or 'use TPSGI::Startup' so Perl won't try
# to find the missing .pm files on disk.

use strict;
use warnings;

sub import {
_maybe_stub('Linux::Perl::inotify', 'Linux/Perl/inotify.pm', \&_stub_linux_perl_inotify);
_maybe_stub('HTTP::Parser::XS', 'HTTP/Parser/XS.pm', \&_stub_http_parser_xs);
_maybe_stub('CGI::Emulate::PSGI', 'CGI/Emulate/PSGI.pm', \&_stub_cgi_emulate_psgi);
_maybe_stub('Log::Dispatch', 'Log/Dispatch.pm', \&_stub_log_dispatch);
_maybe_stub('Log::Dispatch::Screen', 'Log/Dispatch/Screen.pm', \&_stub_log_dispatch_screen);
_maybe_stub('Log::Dispatch::FileRotate', 'Log/Dispatch/FileRotate.pm', \&_stub_log_dispatch_filerotate);
_maybe_stub('URL::Encode', 'URL/Encode.pm', \&_stub_url_encode);
_maybe_stub('DateTime::Format::HTTP', 'DateTime/Format/HTTP.pm', \&_stub_datetime_format_http);
}

sub _maybe_stub {
my ($pkg, $path, $stubber) = @_;
return if $INC{$path}; # already loaded (real or stubbed)
return if eval { require $path; 1 }; # real module loadable — use it
$stubber->(); # install the stub
}

sub _stub_linux_perl_inotify {
package Linux::Perl::inotify;
sub new { bless { _wds => [] }, shift }
sub add { my $self = shift; my $wd = {}; push @{$self->{_wds}}, $wd; return $wd }
sub read { return () }
$INC{'Linux/Perl/inotify.pm'} = 1;
package TPSGITestStubs;
}

sub _stub_http_parser_xs {
package HTTP::Parser::XS;
use constant HEADERS_AS_HASHREF => 1;
sub parse_http_response {
my ($buf, $mode) = @_;
my ($status, $message) = (200, 'OK');
my %headers;
for my $line (split /\r?\n/, $buf) {
if ($line =~ m{^HTTP/1\.(\d)\s+(\d+)\s+(.+)}) {
$status = $2 + 0;
$message = $3;
} elsif ($line =~ m{^([^:]+):\s*(.+)}) {
$headers{lc $1} = $2;
}
}
return (1, undef, 1, $status, $message, \%headers);
}
sub import {
my $class = shift;
my $caller = caller;
no strict 'refs';
for my $sym (@_) {
*{"${caller}::${sym}"} = \&{"HTTP::Parser::XS::${sym}"};
}
}
$INC{'HTTP/Parser/XS.pm'} = 1;
package TPSGITestStubs;
}

sub _stub_cgi_emulate_psgi {
package CGI::Emulate::PSGI;
sub emulate_environment { return () }
$INC{'CGI/Emulate/PSGI.pm'} = 1;
package TPSGITestStubs;
}

sub _stub_log_dispatch {
package Log::Dispatch;
sub new { bless { handlers => [] }, shift }
sub add { push @{$_[0]{handlers}}, $_[1] }
{
no strict 'refs';
for my $lvl (qw{debug info notice warning error critical alert emergency}) {
*{"Log::Dispatch::$lvl"} = sub { };
}
*{"Log::Dispatch::log_and_die"} = sub { die $_[-1] };
}
$INC{'Log/Dispatch.pm'} = 1;
package TPSGITestStubs;
}

sub _stub_log_dispatch_screen {
package Log::Dispatch::Screen;
sub new { bless {}, shift }
$INC{'Log/Dispatch/Screen.pm'} = 1;
package TPSGITestStubs;
}

sub _stub_log_dispatch_filerotate {
package Log::Dispatch::FileRotate;
sub new { bless {}, shift }
$INC{'Log/Dispatch/FileRotate.pm'} = 1;
package TPSGITestStubs;
}

sub _stub_url_encode {
package URL::Encode;
sub url_params_mixed {
my $qs = shift // '';
my %params;
for my $pair (split /&/, $qs) {
my ($k, $v) = split /=/, $pair, 2;
next unless defined $k && length $k;
$k =~ s/\+/ /g; $k =~ s/%([0-9A-Fa-f]{2})/chr hex $1/ge;
$v //= ''; $v =~ s/\+/ /g; $v =~ s/%([0-9A-Fa-f]{2})/chr hex $1/ge;
$params{$k} = $v;
}
return \%params;
}
$INC{'URL/Encode.pm'} = 1;
package TPSGITestStubs;
}

sub _stub_datetime_format_http {
package DateTime::Format::HTTP;
sub parse_datetime { bless { epoch => 0 }, ref($_[0]) || $_[0] }
sub epoch { $_[0]->{epoch} }
$INC{'DateTime/Format/HTTP.pm'} = 1;
package TPSGITestStubs;
}

1;