1.流程控制
也支持正则匹配判断,一般在写复杂语句时使用
[root@study ~]# echo "123abc#456cde 789aaa#aaabbb "|xargs -n1|awk -F# '{if($2~/[0-9]/)print $2}'
456cde
[root@study ~]# echo "123abc#456cde 789aaa#aaabbb "|xargs -n1|awk -F# '{if($2!~/[0-9]/)print $2}'
aaabbb
[root@study ~]# echo "123abc#456cde 789aaa#aaabbb "|xargs -n1|awk -F# '$2!~/[0-9]/{print $2}'
aaabbb
多分支
[root@study ~]# awk '{if($1==4){print "1"} else if($2==5){print "2"}else if($3==6){print "3"}else {print "no"}}' file1
no
1
no
while语句
[root@study ~]# awk '{i=1;while(i<=NF){print $i;i++}}' file1
1
2
3
4
5
6
7
8
9
倒叙打印
[root@study ~]# awk '{for(i=NF;i>=1;i--)print $i}' file1
3
2
1
6
5
4
9
8
7
#都换行了,这并不是我们要的结果。怎么改进呢?
[root@study ~]# awk '{for(i=NF;i>=1;i--)printf $i" ";print ""}' file1
3 2 1
6 5 4
9 8 7
排除第一行、倒数第一行:
[root@study ~]# awk '{for(i=2;i<=NF;i++){printf $i" "};print ""}' file1
2 3
5 6
8 9
[root@study ~]# awk '{for(i=1;i<=NF-1;i++){printf $i" "};print ""}' file1
1 2
4 5
7 8
IP加单引号
[root@study ~]# echo '10.10.10.1 10.10.10.2 10.10.10.3'|awk '{for(i=1;i<=NF;i++)printf "\047"$i"\047"}'
'10.10.10.1''10.10.10.2''10.10.10.3'
\047是ASCII码,可以通过showkey -a命令查看
for循环遍历数组
[root@study ~]# seq -f "str%.g" 5|awk '{a[NR]=$0}END{for(v in a)print v,a[v]}'
4 str4
5 str5
1 str1
2 str2
3 str3
删除数组和元素
[root@study ~]# seq -f "str%.g" 5|awk '{a[NR]=$0}END{delete a;for(v in a)print v,a[v]}'
#空的
[root@study ~]# seq -f "str%.g" 5|awk '{a[NR]=$0}END{delete a[3];for(v in a)print v,a[v]}'
4 str4
5 str5
1 str1
2 str2
exit 退出程序,与shell的exit一样。退出值是0-255之间的数字。
[root@study ~]# seq 5|awk '{if($0~/3/)exit 1}'
[root@study ~]# echo $?
1
[root@study ~]# seq 5|awk '{if($0~/3/)exit(123)}'
[root@study ~]# echo $?
123
2.数组
统计相同字段出现的次数
[root@study ~]# tail /etc/services | awk '{a[$1]++}END{for(v in a)print v,"=>", a[v]}'
com-bardac-dw => 2
3gpp-cbsp => 1
iqobject => 2
matahari => 1
isnetserv => 2
blp5 => 2
[root@study ~]# tail /etc/services | awk '/blp5/{a[$1]++}END{for(v in a)print v,"=>", a[v]}'
blp5 => 2
统计TCP连接状态
[root@study ~]# netstat -antp|awk '/^tcp/{a[$6]++}END{for(v in a)print v,a[v]}'
LISTEN 4
ESTABLISHED 1
去重
[root@study ~]# tail /etc/services
3gpp-cbsp 48049/tcp # 3GPP Cell Broadcast Service Protocol
isnetserv 48128/tcp # Image Systems Network Services
isnetserv 48128/udp # Image Systems Network Services
blp5 48129/tcp # Bloomberg locator
blp5 48129/udp # Bloomberg locator
com-bardac-dw 48556/tcp # com-bardac-dw
com-bardac-dw 48556/udp # com-bardac-dw
iqobject 48619/tcp # iqobject
iqobject 48619/udp # iqobject
matahari 49000/tcp # Matahari Broker
#不打印重复的行
[root@study ~]# tail /etc/services |awk '!a[$1]++'
3gpp-cbsp 48049/tcp # 3GPP Cell Broadcast Service Protocol
isnetserv 48128/tcp # Image Systems Network Services
blp5 48129/tcp # Bloomberg locator
com-bardac-dw 48556/tcp # com-bardac-dw
iqobject 48619/tcp # iqobject
matahari 49000/tcp # Matahari Broker
#打印重复的行
[root@study ~]# tail /etc/services |awk 'a[$1]++'
isnetserv 48128/udp # Image Systems Network Services
blp5 48129/udp # Bloomberg locator
com-bardac-dw 48556/udp # com-bardac-dw
iqobject 48619/udp # iqobject
统计每个相同字段的某字段总数
[root@study ~]# tail /etc/services |awk -F'[ /]+' '{a[$1]+=$2}END{for(v in a)print v,a[v]}'
com-bardac-dw 97112
3gpp-cbsp 48049
iqobject 97238
matahari 49000
isnetserv 96256
blp5 96258
多维数组
awk的多维数组,实际上awk并不支持多维数组,而是逻辑上模拟二维数组的访问方式,比如a[a,b]=1,使用SUBSEP(默认\034)作为分隔下标字段,存储后是这样a\034b。
[root@study ~]# awk 'BEGIN{a["x","y"]=123;for(v in a) print v,a[v]}'
xy 123
#我们可以重新复制SUBSEP变量,改变下标默认分隔符:
[root@study ~]# awk 'BEGIN{SUBSEP=":";a["x","y"]=123;for(v in a) print v,a[v]}'
x:y 123
根据指定字段统计出现次数
[root@study ~]# cat file1
a 1
a 2
a 1
b 2
b 2
b 3
c 1
c 1
d 1
[root@study ~]# awk 'BEGIN{SUBSEP="-"}{a[$1,$2]++}END{for(v in a)print v,a[v]}' file1
b-2 2
b-3 1
c-1 2
a-1 2
a-2 1
d-1 1