forked from karlarao/scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspace.pl
402 lines (374 loc) · 12.6 KB
/
space.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
#!/usr/bin/perl
#
# Author: Navshobh Magotra
# Desc: This script can be used to find largest files or directories in a filesystem.
# It supports some nice features also. e.g. to generate a mail template that can be sent to application support for cleaning up files.
# By default the script will show 10 largest files in the filesystem.
#
# Please run with [-h] or --help to get more help.
#
# Note: Not recommended to run on very busy Producion servers with very big filesystems as it can take lots of CPU for finding, sorting etc.
#
#
# 1.1 Wrote splicehash function and made some changes so that hash limit doesnt cross $MAX_LIMIT and any point of time to check memory eating.This
# can make the script slower but slow scipt is much worthy than a slow server. Also sets the priority of the script to lowest 19.
#
# 1.2 Made changes so that it works on relative paths also. Added [-j] option so that we are able to find files only just iniside the directory specified.
#
setpriority(0, 0, 19); #sets the priority of the script to lowest value 19 so that it does not interfere with processing on busy systems
use strict;
use File::Find;
use Getopt::Long;
use Data::Dumper;
use Cwd qw(abs_path);
#use warnings 'File::Find';
#
#Global Variables Declaration
#
my ($dirflag,$fsize,$just,$fileflag,$count,$tempflag,$newtime,$oldtime,$helpflag);
my (%fnlhash,@sortfnl);
my (%dirsizehash,%truefnlhash,%truedirhash);
my $MAX_LIMIT=100; # So that element count of hashes used never cross the set count. To check that hash doesnt get unmanageable size and eat up memory.
my $subject;
#
#Help Section
#
sub usage
{
print <<USAGE;
Usage: $0 [ -d | -f ] [-c] [-t] [-o <duration>] [-n <duration>] <<filesystem>>
--directory [-d] Print the largest directories inside the filesystem
--file [-f] Print the largest files inside the filesystem
--just [-j] Find files just in the directory specified. Do not descend directories. To be used with [-f] option.
--size [-s] Specify filesize in bytes. Only search for files of size greater than specified size. Default value is 1MB.
--count [-c] Specify count of number of largest files or directories to be printed. Default value is 10. Max value is $MAX_LIMIT.
--template [-t] Generate and print a nice mail template that can be sent to support people for cleaning up files.
--older [-o] Only take into account files or directories modified before specified duration i.e. older than
--newer [-n] Only take into account files or directories modified since specified duration i.e. newer than
[-o] and [-n] options take arguements where we can specify time duration in months,days and hours.
e.g. 20m,20d and 20h would mean 20 months,20 days and 20 hours respectively.
Note: You cannot specify both [-d] and [-f] options at the same time.
Note: Only one filesystem name can be specified.
Note: The script will never traverse across different filesystems. It works exactly like -xdev option in find command.
e.g.
$0 -f /export/home (Display by default 10 largest files inside /export/home)
$0 -d /export/home (Display by default 10 largest directories inside /export/home)
$0 -f /export/home -j (Only find files just inside /export/home. Do not descend any directories)
$0 -f /export/home -c 50 (Display 50 largest files instead of default 10)
$0 -f /export/home -o 2m (Only consider files where last modified time is before last 2 months)
$0 -f /export/home -n 2m (Only consider files where last modified time is within last 2 months)
$0 -f /export/home -s 10000000 (Only consider files with size greater than 10 MB. Default is 1048576 bytes or 1MB)
USAGE
exit 1;
}
#
#Get Inputs and also Check for wrong inputs
#
sub getcheckinput
{
my $options=GetOptions
(
"directory" => \$dirflag,
"file" => \$fileflag,
"just" => \$just,
"size=i" => \$fsize,
"count=i" => \$count,
"template" => \$tempflag,
"older=s" => \$oldtime,
"newer=s" => \$newtime,
"help" => \$helpflag
);
if (!$options or defined $helpflag or scalar(@ARGV)!=1)
{
usage();
}
if (!(defined $fileflag xor defined $dirflag))
{
print "You have to specify one out of [-d] and [-f] options. And you cannot specify both of these. Exiting now !\n";
usage();
}
if (defined $count and $count >=$MAX_LIMIT)
{
print "You have to specify count less than $MAX_LIMIT\n";
usage();
}
$count ||= 10;
$fsize ||= 1024*1024;
if (defined $newtime)
{
if ($newtime =~ /^(\d+)m/)
{
$newtime=(scalar time()) - ($1*2592000);
}
elsif($newtime =~ /^(\d+)d/)
{
$newtime=(scalar time()) - ($1*86400);
}
elsif($newtime =~ /^(\d+)h/)
{
$newtime=(scalar time()) - ($1*3600);
}
else
{
print "Invalid [-n] option supplied. Continuing with default value of 0\n";
$newtime=0;
}
}
else
{
$newtime=0;
}
if (defined $oldtime)
{
if ($oldtime =~ /^(\d+)m/)
{
$oldtime=(scalar time()) - ($1*2592000);
}
elsif ($oldtime =~ /^(\d+)d/)
{
$oldtime=(scalar time()) - ($1*86400);
}
elsif ($oldtime =~ /^(\d+)h/)
{
$oldtime=(scalar time()) - ($1*3600);
}
else
{
print "Invalid [-o] option supplied. Continuing with default value of Current time\n";
$oldtime=time()+3600;
}
}
else
{
$oldtime =time()+10000; #Buffer time for script
}
}
#
#Convert size from no. of bytes toi some human readable format.
#
sub convertsize
{
my $size_bytes=shift;
if ($size_bytes < 1024)
{
$size_bytes .= ' B';
return $size_bytes;
}
elsif ( $size_bytes < 1024*1024 )
{
return (sprintf("%1.2f", ($size_bytes/1024)).' KB');
}
elsif ( $size_bytes < 1024*1024*1024)
{
return (sprintf("%1.2f", ($size_bytes/(1024*1024))).' MB');
}
else
{
return (sprintf("%1.2f", ($size_bytes/(1024*1024*1024))).' GB');
}
}
#
#Sort the hash by size
#
sub sorthashkeys
{
my $hashref=shift;
#Sort the hash by generating an @sorted_keys array that contains the keys of hash (sorted by the size value)
my @sorted_keys= sort { $hashref->{$b}->[0] <=> $hashref->{$a}->[0] } keys %$hashref;
return (@sorted_keys);
}
#
#Print output
#
sub printoutput
{
my $hashref=shift;
my $sortref=shift;
my ($groupname,$username);
my $long=length((@$sortref)[0]);
my $namelen=0;
if ($subject =~ /^\./)
{
$subject=abs_path $subject;
print ("NOTE: All below files/directories are under $subject\n");
}
foreach (@$sortref)
{
if ((getpwuid $hashref->{$_}->[2])[0] =~ /^a\d{6}/)
{
my $realname=(getpwuid $hashref->{$_}->[2])[6];
($namelen < length $realname) && ($namelen=length $realname);
}
(length($_) > $long) && ($long=length($_));
}
$namelen=$namelen+11;
print "=" foreach (1 .. ${long}+${namelen}+51); print "\n";
printf ("%-${long}s %-13s %-${namelen}s %-9s %-15s\n", "Name", "Size", "User","Group","Last Modified");
print "=" foreach (1 .. ${long}+$namelen+51); print "\n";
foreach (@$sortref)
{
if ((getgrgid $hashref->{$_}->[3])[0] eq "")
{
$groupname=$hashref->{$_}->[3];
}
else
{
$groupname=(getgrgid $hashref->{$_}->[3])[0];
}
if ((getpwuid $hashref->{$_}->[2])[0] eq "")
{
$username=$hashref->{$_}->[2];
}
else
{
$username=(getpwuid $hashref->{$_}->[2])[0];
if ($username =~ /^a\d{6}/)
{
my $realname=(getpwuid $hashref->{$_}->[2])[6] if ($username =~ /^a\d{6}/);
$username="$username ($realname)";
}
}
printf ("%-${long}s %-13s %-${namelen}s %-9s %-10s\n", $_, convertsize ($hashref->{$_}->[0] ),$username, $groupname, my $time=localtime $hashref->{$_}->[1]);
}
}
#
# Splice a hash????. Ahh, It will take two hashes and then combine them and sort the hash keys of resulting hash and then trim off that hash so that
# it contains the no. of elements that are equal to the first hash passed. This is just to keep in count the hash elements so that a hash doesnt grow to
# unmanageable size.
#
sub splicehash
{
my $hashref1=shift;
my $hashref2=shift;
foreach (keys %$hashref2)
{
$hashref1->{$_}=$hashref2->{$_};
}
my @sort1=sorthashkeys($hashref1);
splice @sort1,$count;
my %temp;
foreach (@sort1)
{
$temp{$_}=$hashref1->{$_};
}
%$hashref1=%temp;
%$hashref2=(); # Nullify the second hash passed
return;
}
#
# Trim the directory hash so that it doesnt exceed the $MAX_LIMIT.
#
sub splicedirhash
{
my $hashref1=shift;
my $hashref2=shift;
foreach (keys %$hashref2)
{
$hashref1->{$_}=$hashref2->{$_};
}
my @sort1=sort { $hashref1->{$b} <=> $hashref1->{$a} } keys %$hashref1;
splice @sort1,$count;
my %temp;
foreach (@sort1)
{
$temp{$_}=$hashref1->{$_};
}
%$hashref1=%temp;
%$hashref2=();
return;
}
#
#Main Program
#
getcheckinput();
$subject=shift;
die "$subject: No Such Direcory Exists.\n" if (! -d $subject);
if ( defined $fileflag) #if biggest files are required
{
find(
{
preprocess => \&condition,
wanted => \&dothisfile
}, $subject);
sub condition
{
return @_ if ! defined $just;
return grep {-f} @_;
}
sub dothisfile
{
my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime,$ctime,$blksize,$blocks) = lstat($_);
return if $File::Find::prune=($dev != $File::Find::topdev) or ($size < $fsize);
#Generate a hash of arrays with filename being the key and attributes the values
$fnlhash{$File::Find::name} = [$size,$mtime,$uid,$gid] if (-f and $mtime>$newtime and $mtime<$oldtime);
if ((scalar keys %fnlhash) == $MAX_LIMIT) #Nice logic to check that hash doesnt cross $MAX_LIMIT
{
splicehash(\%truefnlhash,\%fnlhash);
}
}
}
elsif (defined $dirflag) #if biggest directories are required
{
find(
{
wanted => \&dothisdir
}, $subject);
sub dothisdir
{
my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime,$ctime,$blksize,$blocks) = lstat($_);
return if $File::Find::prune=($dev != $File::Find::topdev);
#Generate a hash of directories with directory names as keys and their sizes as values
$dirsizehash{$File::Find::dir}+=$size if -f ;
if ((scalar keys %dirsizehash) == $MAX_LIMIT )
{
splicedirhash(\%truedirhash,\%dirsizehash);
}
}
splicedirhash(\%truedirhash,\%dirsizehash);
foreach (keys %truedirhash)
{
my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime,$ctime,$blksize,$blocks) = lstat($_);
#Generate a hash of arrays with directory name being the key and attributes the values
$fnlhash{$_}=[$truedirhash{$_},$mtime,$uid,$gid] if ($truedirhash{$_} > $fsize and $mtime>$newtime and $mtime<$oldtime);
}
}
else
{
print "Something Unexpected occured. Exiting now !!\n";
usage();
}
splicehash(\%truefnlhash,\%fnlhash);
@sortfnl=sorthashkeys(\%truefnlhash);
splice @sortfnl,$count;
if (defined $tempflag)
{
#Generate a nice template e-mail to be sent to supoort people.
my $hostname=`hostname`;
chomp $hostname;
my $subject_r=$subject;
$subject_r=abs_path $subject if ($subject =~ /^\./);
print <<MAIL;
#########
Subject:-
#########
Filesystem usage for $subject_r exceeds threshold on $hostname
######
Body:-
######
Hi Guys,
We have this alert on high filesystem usage for "$subject_r" on server "$hostname". Upon investigation its found that below files/directories are taking up most of the space. Can you please try to clean up some of these files/directories so that the usage can get below threshold and any kind of space issue be avoided.
MAIL
printoutput(\%truefnlhash,\@sortfnl);
print <<MAIL;
Please be aware, that full filesystems can/will
- Continue to generate alerts
- Stop an application from functioning.
- Stop a server from functioning and lead to service outage.
MAIL
print ("\n");
}
else
{
printoutput(\%truefnlhash,\@sortfnl);
}
exit 0;