-
Notifications
You must be signed in to change notification settings - Fork 151
/
awk.sh
executable file
·177 lines (100 loc) · 4.53 KB
/
awk.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
#!/usr/bin/env bash
# POSIX 7. http://pubs.opengroup.org/onlinepubs/9699919799/utilities/awk.html
# Turing complet, but use only for *ultra simple* POSIX text table field manipulation,
# e.g.: if a column equals X, print Y.
# it only has better golfing on that very limited problem set.
# For more sanity, use Python.
## Basic examples
# Example of where you should use it: print second column is the first is `'a'`:
printf 'a 1\nb 2\na 3' | awk '$1 == "a" { print $2 }'
# Numeric comparison: print col 2 if col 1 equals 1:
printf '01 a\n2 b\n001 c' | awk '$1 == 1 { print $2 }'
# Simple arithmetic operations:
printf 'a 01\nb 2\na 3' | awk '$2 == 1 { print $1 }'
# Line number operations: print every nth line:
seq 10 | awk 'NR % 3 == 0'
# Line number operations: delete every nth line:
seq 10 | awk 'NR % 3 != 0'
# GNU sed can do those with the `~` extension:
# http://superuser.com/questions/396536/how-to-keep-only-every-nth-line-of-a-file
## General syntax
# Similar to sed.
# A general awk program is of the type:
# BEGIN { STATEMENT_BEGIN }
# CONDITION0 { STATEMENT0 }
# CONDITION1 { STATEMENT1 }
# ...
# CONDITION_N { STATEMENT_N }
# END { STATEMENT_END }
# To put multiple statements on a single lime, use `;`.
printf '1 2\n3 4' | awk 'BEGIN{print "b"}1<2{print "l"}1<2{print "l2"; print "l3"}1>2{print "l4"}END{print "e"}'
#$'b\nl\nl2\nl3\nl\nl2\nl3\ne
echo '0.5 0.5' | awk '{print $1 + $2}'
#1
# If a statment is missing, print is implied:
[ "$(echo 'a' | awk '1')" = 'a' ] || exit 1
# If a condition is missing, 1 (true) is implied:
[ "$(echo 'a' | awk '{print}')" = 'a' ] || exit 1
# - arithmetic: same as C: +, *, -, /
# - string comp: `==` and `!=`
# - posix string ERE regex comp: ~// !~// (sub match accepted unless you use `^$`)
# - if else for while: like C
## Variables
# Same as c
# Initialized to 0.
# $0: entire record
# $\n: fields
# last field: $(NF-1)
## FS
# Field (column) separator.
# FS=':' FS=/[[:space:]]/ -F'/[[:space:]]/'
## OFS: output field separator
## RS: record (line) separator
## ORS: output ""
## NF: number of fields
## NR: number of current record
## FNR: total number of records in current file file
## length: number of bytes of the line, excluding the newline
## RS
# Record separator.
# `''` is a special value that selects blank lines, i.e. paragraphs:
# - http://unix.stackexchange.com/questions/136218/grep-sed-or-awk-print-entire-paragraph-in-a-file-on-pattern-match
# - http://unix.stackexchange.com/questions/82944/how-to-grep-for-text-in-a-file-and-display-the-paragraph-that-has-the-text/82958#82958
[ "$(printf '1 a0\na1\na2\n\nb0\nb1\nb2\n\nc0\nc1\nc2\n' | awk -v RS='' '/b1/')" = "$(printf 'b0\nb1\nb2')" ] || exit 1
## String to int
awk 'BEGIN {print "1"+1}'
awk 'BEGIN {print " 1"+1}'
#2
## print
# By default, print does space separation:
awk 'BEGIN {print "a", 1}'
#'a 1'
# String concat:
awk 'BEGIN {print "a" "b"}'
#'ab'
# Without arguments, it defaults to `print $0`:
[ "$(echo 'a' | awk '{print}')" = 'a' ] || exit 1
# Note that the default action is to print:
[ "$(echo 'a' | awk '1')" = 'a' ] || exit 1
[ "$(echo 'a' | awk '1;1')" = "$(printf "a\na")" ] || exit 1
## next
# Stop current action, move to next line.
## Subtract adjacent lines
# TODO fails if the first is zero.
#[ "$(printf '0\n1\n3\n6' | awk awk 'p{print $0-p}{p=$0}')" = "$(printf '1\n2\n3')" ] || exit 1
## Applications
# Print second field of all entries if first field equals a given integer value:
[ "$(printf '1 a\n2 b\n01 c\n' | awk '$1 == 1 { print $2 }')" = "$(printf 'a\nc')" ] || exit 1
# Same as above, but match strings (note how `01` equals `1` for integer comparison;
[ "$(printf '1 a\n2 b\n01 c\n' | awk '$1 == "1" { print $2 }')" = "$(printf 'a')" ] || exit 1
# Same as above, but print only first match:
[ "$(printf '1 a\n2 b\n1 c\n' | awk '$1 == 1 { print $2; exit }')" = 'a' ] || exit 1
# Same as above, but match EREs:
[ "$(printf '1 a\n2 b\n1 c\n' | awk '$1 ~/^1$/ { print $2; exit }')" = 'a' ] || exit 1
# Sum column:
#awk '{sum += $1} END {print sum}'
# Variables start as 0, so no need to initalize them.
# http://unix.stackexchange.com/questions/115998/how-do-i-multiply-and-sum-column-data-using-awk-and-or-sed
# Skip blank lines:
# http://stackoverflow.com/questions/11687216/awk-to-skip-the-blank-lines
echo 'ALL ASSERTS PASSED'