-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfilter.pl
116 lines (104 loc) · 2.27 KB
/
filter.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
use Getopt::Long;
use Pod::Usage;
my $NAME = "Data Filterer";
my $VERSION = "1.0.0";
my $file;
my %keep;
my %filter;
my @inputFilters = ();
my @inputKeep = ();
my $ignoreRows = 0;
my $caseInsensitive = 0;
my $help = 0;
GetOptions(
"file:s" => \$file,
"filter:s" => \@inputFilters,
"keep:s" => \@inputKeep,
"ignoreheaders:s" => \$ignoreRows,
"caseinsensitive" => \$caseInsensitive,
"help" => \$help
);
if ($help) {
print $NAME . ", version " . $VERSION . "\n";
pod2usage(
-input => "man.pod",
-verbose => 1,
-exitval => 0
);
exit(0);
}
elsif (!$file) {
die "Please provide a file\n";
}
elsif ($ignoreRows && $ignoreRows !~ /^[1-9]\d*$/) {
die "ignoreheaders must be a number\n";
}
if (scalar(@inputFilters)) {
foreach (@inputFilters) {
$filter{$_} = 1;
}
}
else {
die "No filter specified. Nothing to do.\n";
}
if (scalar(@inputKeep)) {
foreach (@inputKeep) {
$keep{$_} = 1;
}
}
$file =~ /(.+)\.([^\.]*)$/i;
my $newFile = $1."-filtered.".$2;
open FILE, "< $file" || die "please provide a file";
open OUTFILE, "> $newFile";
my $i = 0;
my $fullLine = "";
while (my $line = <FILE>) {
if ($ignoreRows && $i < $ignoreRows) {
print OUTFILE "$line";
$i++;
next;
}
#If line starts with a space, assume it is continuation of previous line.
if ($line =~ /^\s/) {
$fullLine .= $line;
}
else {
#If this is the very first line read, just store it.
if ($fullLine eq "") {
$fullLine = $line;
}
#Else if we reached a "new line", just evaluate the previous "full line", then store the current new line and start processing.
else {
print OUTFILE "$fullLine" if isValid($fullLine);
$fullLine = $line;
}
}
if (eof && isValid($fullLine)) {
print OUTFILE "$fullLine";
}
}
close(FILE);
close(OUTFILE);
sub regexEscape {
my $str = shift;
$str =~ s/([\.\^\$\*\+\?\(\)\[\]\{\}\\\/\|])/\\$1/g;
return $str;
}
sub isValid {
my $line = shift;
my $valid = 1;
foreach my $currFilter (keys %filter) {
$currFilter = regexEscape($currFilter);
if (($caseInsensitive && $line =~ /$currFilter/i) || (!$caseInsensitive && $line =~ /$currFilter/)) {
$valid = 0;
last;
}
}
foreach my $currKeep (keys %keep) {
if (($caseInsensitive && $line =~ /$currKeep/i) || (!$caseInsensitive && $line =~ /$currKeep/)) {
$valid = 1;
last;
}
}
return $valid;
}