Docker ebtables数据包过滤流向分析
整体数据包过滤流向
ebtables log
# 设置ebtables追踪,系统默认ebtables没有规则
cat <<-EOF > ebtables.sh
#!/bin/bash
insert() {
ebtables -t broute -I BROUTING --log --log-prefix 'ctc/ebtable/broute-BROUTING' --log-level debug
ebtables -t nat -I PREROUTING --log --log-prefix 'ctc/ebtable/nat-PREROUTE' --log-level debug
ebtables -t nat -I OUTPUT --log --log-prefix 'ctc/ebtable/nat-OUTPUT' --log-level debug
ebtables -t nat -I POSTROUTING --log --log-prefix 'ctc/ebtable/nat-POSTROUTE' --log-level debug
ebtables -t filter -I INPUT --log --log-prefix 'ctc/ebtable/filter-input' --log-level debug
ebtables -t filter -I OUTPUT --log --log-prefix 'ctc/ebtable/filter-output' --log-level debug
ebtables -t filter -I FORWARD --log --log-prefix 'ctc/ebtable/filter-forward' --log-level debug
}
delete() {
ebtables -t broute -D BROUTING --log --log-prefix 'ctc/ebtable/broute-BROUTING' --log-level debug
ebtables -t nat -D PREROUTING --log --log-prefix 'ctc/ebtable/nat-PREROUTE' --log-level debug
ebtables -t nat -D OUTPUT --log --log-prefix 'ctc/ebtable/nat-OUTPUT' --log-level debug
ebtables -t nat -D POSTROUTING --log --log-prefix 'ctc/ebtable/nat-POSTROUTE' --log-level debug
ebtables -t filter -D INPUT --log --log-prefix 'ctc/ebtable/filter-input' --log-level debug
ebtables -t filter -D OUTPUT --log --log-prefix 'ctc/ebtable/filter-output' --log-level debug
ebtables -t filter -D FORWARD --log --log-prefix 'ctc/ebtable/filter-forward' --log-level debug
}
check() {
count=`ebtables-save | grep ctc| wc -l`
if [ "$count" == "0" ]; then
echo "Delete Success"
else
echo "Delete Fail, Use the ebtables-save to check what rules still exist"
fi
}
if [ "$1" == "d" ]; then
delete
check
else
insert
fi
EOF
# 运行脚本
[root@boy ~]# bash ebtables.sh
Container to Container
# 启动container1
[root@boy ~]# docker run -it busybox sh
/ # ip addr
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
5: eth0@if6: <BROADCAST,MULTICAST,UP,LOWER_UP,M-DOWN> mtu 1500 qdisc noqueue
link/ether 02:42:ac:11:00:02 brd ff:ff:ff:ff:ff:ff
inet 172.17.0.2/16 brd 172.17.255.255 scope global eth0
valid_lft forever preferred_lft forever
# 启动container2
[root@boy ~]# docker run -it busybox sh
/ # ip addr
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
7: eth0@if8: <BROADCAST,MULTICAST,UP,LOWER_UP,M-DOWN> mtu 1500 qdisc noqueue
link/ether 02:42:ac:11:00:03 brd ff:ff:ff:ff:ff:ff
inet 172.17.0.3/16 brd 172.17.255.255 scope global eth0
valid_lft forever preferred_lft forever
# container1 ping container2 一次(提前ping一次,并将ARP相关信息清除了)
/ # ping -c1 172.17.0.3
PING 172.17.0.3 (172.17.0.3): 56 data bytes
64 bytes from 172.17.0.3: seq=0 ttl=64 time=0.149 ms
--- 172.17.0.3 ping statistics ---
1 packets transmitted, 1 packets received, 0% packet loss
round-trip min/avg/max = 0.149/0.149/0.149 ms
# 查看MAC信息
[root@boy ~]# ip link show
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN mode DEFAULT group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
2: ens33: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
link/ether 00:0c:29:ec:1c:2d brd ff:ff:ff:ff:ff:ff
3: ens36: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
link/ether 00:0c:29:ec:1c:37 brd ff:ff:ff:ff:ff:ff
4: docker0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT group default
link/ether 02:42:a1:24:c7:cb brd ff:ff:ff:ff:ff:ff
6: veth614c61c@if5: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue master docker0 state UP mode DEFAULT group default
link/ether 06:0c:99:02:2c:bd brd ff:ff:ff:ff:ff:ff link-netnsid 0
8: vethc753005@if7: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue master docker0 state UP mode DEFAULT group default
link/ether ba:65:60:a1:f8:51 brd ff:ff:ff:ff:ff:ff link-netnsid 1
[root@boy ~]# bridge fdb
01:00:5e:00:00:01 dev ens33 self permanent
33:33:00:00:00:01 dev ens33 self permanent
33:33:ff:b4:91:65 dev ens33 self permanent
01:00:5e:00:00:01 dev ens36 self permanent
33:33:00:00:00:01 dev ens36 self permanent
33:33:ff:b8:00:1f dev ens36 self permanent
33:33:00:00:00:01 dev docker0 self permanent
01:00:5e:00:00:01 dev docker0 self permanent
33:33:ff:24:c7:cb dev docker0 self permanent
02:42:a1:24:c7:cb dev docker0 master docker0 permanent
02:42:a1:24:c7:cb dev docker0 vlan 1 master docker0 permanent
02:42:ac:11:00:02 dev veth614c61c master docker0
06:0c:99:02:2c:bd dev veth614c61c vlan 1 master docker0 permanent
06:0c:99:02:2c:bd dev veth614c61c master docker0 permanent
33:33:00:00:00:01 dev veth614c61c self permanent
01:00:5e:00:00:01 dev veth614c61c self permanent
33:33:ff:02:2c:bd dev veth614c61c self permanent
ba:65:60:a1:f8:51 dev vethc753005 vlan 1 master docker0 permanent
02:42:ac:11:00:03 dev vethc753005 master docker0
ba:65:60:a1:f8:51 dev vethc753005 master docker0 permanent
33:33:00:00:00:01 dev vethc753005 self permanent
01:00:5e:00:00:01 dev vethc753005 self permanent
33:33:ff:a1:f8:51 dev vethc753005 self permanent
[root@boy ~]# brctl show
bridge name bridge id STP enabled interfaces
docker0 8000.0242a124c7cb no veth614c61c
vethc753005
[root@boy ~]# brctl showmacs docker0
port no mac addr is local? ageing timer
1 02:42:ac:11:00:02 no 158.28
2 02:42:ac:11:00:03 no 158.28
1 06:0c:99:02:2c:bd yes 0.00
1 06:0c:99:02:2c:bd yes 0.00
2 ba:65:60:a1:f8:51 yes 0.00
2 ba:65:60:a1:f8:51 yes 0.00
# 查看ebtables过滤信息
[root@boy ~]# dmesg
######################### ICMP REQUEST #######################
# 由于Container1 已经使用ARP学习到Container2的MAC,故而知道MAC dest,当流量从veth614c61c发送到Docker0时,开始进行过滤规则匹配,但是由于此时还没进行转发决策,此时并不知道这个数据包要转发给那个接口
[ 223.305743] ctc/ebtable/broute-BROUTING IN=veth614c61c OUT= MAC source = 02:42:ac:11:00:02 MAC dest = 02:42:ac:11:00:03 proto = 0x0800
[ 223.305747] ctc/ebtable/nat-PREROUTE IN=veth614c61c OUT= MAC source = 02:42:ac:11:00:02 MAC dest = 02:42:ac:11:00:03 proto = 0x0800
# 这里进行forward决策后,知道要转发给vethc753005
[ 223.305751] ctc/ebtable/filter-forward IN=veth614c61c OUT=vethc753005 MAC source = 02:42:ac:11:00:02 MAC dest = 02:42:ac:11:00:03 proto = 0x0800
[ 223.305753] ctc/ebtable/nat-POSTROUTE IN= OUT=vethc753005 MAC source = 02:42:ac:11:00:02 MAC dest = 02:42:ac:11:00:03 proto = 0x0800
# 回去和上边步骤一样
######################### ICMP REPLY ###########################
[ 223.305846] ctc/ebtable/broute-BROUTING IN=vethc753005 OUT= MAC source = 02:42:ac:11:00:03 MAC dest = 02:42:ac:11:00:02 proto = 0x0800
[ 223.305847] ctc/ebtable/nat-PREROUTE IN=vethc753005 OUT= MAC source = 02:42:ac:11:00:03 MAC dest = 02:42:ac:11:00:02 proto = 0x0800
[ 223.305849] ctc/ebtable/filter-forward IN=vethc753005 OUT=veth614c61c MAC source = 02:42:ac:11:00:03 MAC dest = 02:42:ac:11:00:02 proto = 0x0800
[ 223.305850] ctc/ebtable/nat-POSTROUTE IN= OUT=veth614c61c MAC source = 02:42:ac:11:00:03 MAC dest = 02:42:ac:11:00:02 proto = 0x0800
通过上述分析可知过滤流程如下:
Host to Container
# 启动container1
[root@boy ~]# docker run -it busybox sh
/ # ip addr
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
9: eth0@if10: <BROADCAST,MULTICAST,UP,LOWER_UP,M-DOWN> mtu 1500 qdisc noqueue
link/ether 02:42:ac:11:00:02 brd ff:ff:ff:ff:ff:ff
inet 172.17.0.2/16 brd 172.17.255.255 scope global eth0
valid_lft forever preferred_lft forever
# host ping container2 一次(提前ping一次,并将ARP相关信息清除了)
[root@boy ~]# ping -c1 172.17.0.2
PING 172.17.0.2 (172.17.0.2) 56(84) bytes of data.
64 bytes from 172.17.0.2: icmp_seq=1 ttl=64 time=0.065 ms
--- 172.17.0.2 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.065/0.065/0.065/0.000 ms
# 查看MAC信息
[root@boy ~]# ip link show
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN mode DEFAULT group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
2: ens33: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
link/ether 00:0c:29:ec:1c:2d brd ff:ff:ff:ff:ff:ff
3: ens36: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
link/ether 00:0c:29:ec:1c:37 brd ff:ff:ff:ff:ff:ff
4: docker0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT group default
link/ether 02:42:a1:24:c7:cb brd ff:ff:ff:ff:ff:ff
10: veth5b8e7d2@if9: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue master docker0 state UP mode DEFAULT group default
link/ether 66:d2:9d:e1:77:c7 brd ff:ff:ff:ff:ff:ff link-netnsid 0
[root@boy ~]# bridge fdb
01:00:5e:00:00:01 dev ens33 self permanent
33:33:00:00:00:01 dev ens33 self permanent
33:33:ff:b4:91:65 dev ens33 self permanent
01:00:5e:00:00:01 dev ens36 self permanent
33:33:00:00:00:01 dev ens36 self permanent
33:33:ff:b8:00:1f dev ens36 self permanent
33:33:00:00:00:01 dev docker0 self permanent
01:00:5e:00:00:01 dev docker0 self permanent
33:33:ff:24:c7:cb dev docker0 self permanent
02:42:a1:24:c7:cb dev docker0 master docker0 permanent
02:42:a1:24:c7:cb dev docker0 vlan 1 master docker0 permanent
02:42:ac:11:00:02 dev veth5b8e7d2 master docker0
66:d2:9d:e1:77:c7 dev veth5b8e7d2 vlan 1 master docker0 permanent
66:d2:9d:e1:77:c7 dev veth5b8e7d2 master docker0 permanent
33:33:00:00:00:01 dev veth5b8e7d2 self permanent
01:00:5e:00:00:01 dev veth5b8e7d2 self permanent
33:33:ff:e1:77:c7 dev veth5b8e7d2 self permanent
[root@boy ~]# brctl show
bridge name bridge id STP enabled interfaces
docker0 8000.0242a124c7cb no veth5b8e7d2
[root@boy ~]# brctl showmacs docker0
port no mac addr is local? ageing timer
1 02:42:ac:11:00:02 no 131.17
1 66:d2:9d:e1:77:c7 yes 0.00
1 66:d2:9d:e1:77:c7 yes 0.00
# 查看ebtables过滤信息
[root@boy ~]# dmesg
# 本地ping构建的程序由于知道要转发到veth5b8e7d2(且根据路由需要将数据包从Docker0转发出去,也即构建源MAC为Docker0),且实现已通过ARP学习到MAC(容器namespace和host不一样,对于host来说容器的MAC是不可见的,需要去学习)
[ 1906.877940] ctc/ebtable/filter-output IN= OUT=veth5b8e7d2 MAC source = 02:42:a1:24:c7:cb MAC dest = 02:42:ac:11:00:02 proto = 0x0800
# 此时POSTROUTING后,流量转发给veth5b8e7d2(然后由container去处理icmp request)
[ 1906.877943] ctc/ebtable/nat-POSTROUTE IN= OUT=veth5b8e7d2 MAC source = 02:42:a1:24:c7:cb MAC dest = 02:42:ac:11:00:02 proto = 0x0800
# 当容器处理完icmp request后,构建icmp reply发送给Docker0,然后开始BROUTING
[ 1906.877961] ctc/ebtable/broute-BROUTING IN=veth5b8e7d2 OUT= MAC source = 02:42:ac:11:00:02 MAC dest = 02:42:a1:24:c7:cb proto = 0x0800
[ 1906.877962] ctc/ebtable/nat-PREROUTE IN=veth5b8e7d2 OUT= MAC source = 02:42:ac:11:00:02 MAC dest = 02:42:a1:24:c7:cb proto = 0x0800
# 由于MAC dest 02:42:a1:24:c7:cb 为本机Dockekr0的MAC,故而会进INPUT
[ 1906.877964] ctc/ebtable/filter-input IN=veth5b8e7d2 OUT= MAC source = 02:42:ac:11:00:02 MAC dest = 02:42:a1:24:c7:cb proto = 0x0800
由上述分析可知数据包过滤流程如下: