0
点赞
收藏
分享

微信扫一扫

邻居子系统1.4

booksmg2014 2022-06-24 阅读 63

1.4.1:状态定时器回调neigh_timer_handler

设置定时器来处理那些需要定时器处理的状态,定时器回调函数为neigh_timer_handler;函数会根据状态机变换规则对状态进行切换,切换状态后,如果需要更新输出函数则更新,并更新定时器下一次超时时间;其中NUD_INCOMPLETE | NUD_PROBE状态需要发送邻居请求,如果超过最大次数,则释放缓存中的数据包;主要包含邻居项状态的转换以及邻居项solicit请求相关的函数

在申请邻居项的内存函数neigh_alloc里,会创建该定时器,并会将定时器的超时处理函数设置为neigh_timer_handler。

/* Called when a timer expires for a neighbour entry. */
/*

对于处于reach状态的邻居项:
1、如果当前时间距确认时间confirmed,还未到超时时限reachable_time,则将定时器时间设置为邻居项的超时时限reachable_time
2、当前时间已晚于确认时间加上超时时限,当未超过邻居项使用时间加上delay_probe_time,则将状态设置为DELAY。
这个状态的改变条件,我感觉设置的很巧妙。
一般是进入stale状态的邻居项,在超时前有数据时,则进入Delay状态。
3、当前时间晚于used+delay_probe_time,说明在confirmed+reachable_time超时前的短暂时间
内没有数据发送,此时即将状态设置为STALE,

对于Delay状态的邻居项:
1、当前时间小于connect_time+delay_time时,说明邻居项可能在定时器超时函数刚执行时
即已经更新了connect_time时间,此时即可以在邻居项的状态设置为reach (connect_time会在neigh_update里被更新)
2、说明该邻居项在delay_time超时后,还没有被外部确认,此时就需要将邻居项的状态设置为probe,准备发送solict请求
对于probe与incomplete状态的邻居项,此时需要将定时器的下一次超时时间设置为retrain,如果在下一次超时前,还没有得到确认,则还会执行该定时器处理函数

对于probe与incomplete状态的邻居项:
1、如果已经超过了最大发包次数,则将邻居项的状态设置FAILED,并调neigh_invalidate,发送错误报告,并释放缓存的数据包
2、如果还没有超过最大发包次数,则调用solicit,发送邻居项solicit请求。
*/

static void neigh_timer_handler(unsigned long arg)
{
unsigned long now, next;
struct neighbour *neigh = (struct neighbour *)arg;
unsigned int state;
int notify = 0;

write_lock(&neigh->lock);

state = neigh->nud_state;
now = jiffies;
next = now + HZ;
/* 非定时器状态 */
if (!(state & NUD_IN_TIMER))
goto out;

if (state & NUD_REACHABLE) {/* REACHABLE状态 */
if (time_before_eq(now,
neigh->confirmed + neigh->parms->reachable_time)) {/* 确认时间未超时,设置下次超时时间 */
NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
next = neigh->confirmed + neigh->parms->reachable_time;
} else if (time_before_eq(now,
neigh->used + neigh->parms->delay_probe_time)) { /* 确认时间已经超时了,但是闲置时间未达到 */
NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
neigh->nud_state = NUD_DELAY; /* 进入DELAY状态 */
neigh->updated = jiffies;
neigh_suspect(neigh); /* 更新output函数 */
next = now + neigh->parms->delay_probe_time;
} else {/* 确认时间和闲置时间都超时了 */
NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
neigh->nud_state = NUD_STALE; /* 进入STALE状态 */
neigh->updated = jiffies;
neigh_suspect(neigh); /* 更新输出函数 */
notify = 1;
}
} else if (state & NUD_DELAY) {/* DELAY状态 */
if (time_before_eq(now,
neigh->confirmed + neigh->parms->delay_probe_time)) {//其间收到了应答报文/* 最后一次确认时间没达到超时时间 */
NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
neigh->nud_state = NUD_REACHABLE;/* 进入REACHABLE状态,更新输出函数 */
neigh->updated = jiffies;
neigh_connect(neigh);
notify = 1;
next = neigh->confirmed + neigh->parms->reachable_time;
} else { /* 最后确认时间已经达到了超时时间,进入PROBE状态 */
NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
neigh->nud_state = NUD_PROBE;
neigh->updated = jiffies;
atomic_set(&neigh->probes, 0);
notify = 1;
next = now + neigh->parms->retrans_time;
}
} else {
/* NUD_PROBE|NUD_INCOMPLETE */
next = now + neigh->parms->retrans_time;
}
/* NUD_PROBE|NUD_INCOMPLETE状态,达到了最大尝试次数 */
if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
neigh->nud_state = NUD_FAILED;
notify = 1;
neigh_invalidate(neigh);
}
/* 定时器处理状态,则更新定时器 */
if (neigh->nud_state & NUD_IN_TIMER) {
if (time_before(next, jiffies + HZ/2))
next = jiffies + HZ/2;
if (!mod_timer(&neigh->timer, next))
neigh_hold(neigh);
}
if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
neigh_probe(neigh);
} else {
out:
write_unlock(&neigh->lock);
}
/* 通知关心的模块 */
if (notify)
neigh_update_notify(neigh);

neigh_release(neigh);
}

 

邻居项状态的更新函数3
第三个邻居项状态的更新函数,通过__neigh_event_send;

首先说明一下 Routing与 Neighboring subsystem的关联

1、在路由过程中,需要寻找或创建 struct dst_entry (另一种形式是 struct rtable)。 dst_entry 通过neighbour 域与 struct neighbour 关联。

每个 dst_entry 对应一个 neighbour,这样在路由之后,立刻能找到对应的 neighbour,此后,数据包通过 neighbour->output 送到链路层。

以 UDP 包的发送过程为例,这个过程如下

Udp_sendmsg() ==> ip_route_output() 
        ==> udp_push_pending_frames()==》udp_send_skb==》ip_send_skb==》ip_local_out==》dst_output==》skb->dst->output

Ip_route_output_slow() : 当查不到路由 cache 后,根据 route rule ,通过 dst_alloc() 创建一个 dst_entry 结构,这同时也是一个 rtable 结构,然后将 dst_entry 的 output 指向 ip_output();

此后,udp_sendmsg 继续调用 ip_send_skb() 来发包;

rth->u.dst.output=ip_output;

Udp_sendmsg() ==> udp_push_pending_frames ==> udp_send_skb==> ip_send_skb==>ip_local_out==》skb->dst->output()//这里的 output 就是 ip_output()

ip_output ==> __ip_finish_output() ==> ip_finish_output2() ==> dst_neigh_output()

因此,最终数据包是通过dst_neigh_output  也就是 neighbour->output() 往下送的。

IPv4 代码实现:ip_route_output在路由 cache 中查不到路由结果后,查找__mkroute_output->rt_dst_alloc-> route rule ,如果没有合适的路由规则,则失败返回。否则,通过 dst_alloc() 创建一个 dst_entry 结构,这同时也是一个 rtable 结构,此 rtable 结构被挂入 hash 表中。这时候我们已经有了下一跳的 L3地址。(也可能没有,例如绑定 interface 的情况,需要看代码是如何处理的)。

static struct dst_ops ipv4_dst_ops = {
.family = AF_INET,
.protocol = cpu_to_be16(ETH_P_IP),
.check = ipv4_dst_check,
.default_advmss = ipv4_default_advmss,
.mtu = ipv4_mtu,
.cow_metrics = ipv4_cow_metrics,
.destroy = ipv4_dst_destroy,
.ifdown = ipv4_dst_ifdown,
.negative_advice = ipv4_negative_advice,
.link_failure = ipv4_link_failure,
.update_pmtu = ip_rt_update_pmtu,
.redirect = ip_do_redirect,
.local_out = __ip_local_out,
.neigh_lookup = ipv4_neigh_lookup,//rtable 和 neigh_table绑定
};


static struct rtable *rt_dst_alloc(struct net_device *dev,
bool nopolicy, bool noxfrm, bool will_cache)
{
return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
(will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |
(nopolicy ? DST_NOPOLICY : 0) |
(noxfrm ? DST_NOXFRM : 0));
}

void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
int initial_ref, int initial_obsolete, unsigned short flags)
{
struct dst_entry *dst;

if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) {
if (ops->gc(ops))
return NULL;
}
dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC);
if (!dst)
return NULL;
dst->child = NULL;
dst->dev = dev;
if (dev)
dev_hold(dev);
dst->ops = ops;// 赋值ops
dst_init_metrics(dst, dst_default_metrics, true);
dst->expires = 0UL;
dst->path = dst;
#ifdef CONFIG_XFRM
dst->xfrm = NULL;
#endif
dst->input = dst_discard;
dst->output = dst_discard//创建时
dst->error = 0;
dst->obsolete = initial_obsolete;
dst->header_len = 0;
dst->trailer_len = 0;
#ifdef CONFIG_IP_ROUTE_CLASSID
dst->tclassid = 0;
#endif
atomic_set(&dst->__refcnt, initial_ref);
dst->__use = 0;
dst->lastuse = jiffies;
dst->flags = flags;
dst->pending_confirm = 0;
dst->next = NULL;
if (!(flags & DST_NOCOUNT))
dst_entries_add(ops, 1);
return dst;
}

 

 

下一步,在老版本的内核中要通过ip_finish_output2 将 rtable 与 neighbour 进行绑定;

arp_bind_neighbour() 根据给定的下一跳 L3 地址,到 arp hash 表中找 neighbour,找到的话,dst->neighbour 就有了归宿;找不到,只好调用 neighbour_create() 创建一个新的 neighbour,这是在__neigh_lookup_errno() 中完成的

但是新版本中都是缓存下一跳地址,所以路由表和neigh表分开,现在是找到下一跳IP, 直接对IP运行对用neigh 相关协议找到IP 对应MAC;

可以看到dst_neigh_output(dst, neigh, skb); 虽然传入的dst参数,但是其实际没有使用dst->ops函数去处理rtable 和neighbour的bind关系

1.1 ip_finish_output2() 
1.2 nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr)
1.3 __ipv4_neigh_lookup_noref(dev, nexthop)
  if (!neigh)
    neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
1.4 if(neigh)
  dst_neigh_output(dst, neigh, skb);
static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n,
struct sk_buff *skb)
{
struct hh_cache *hh;

if (unlikely(dst->pending_confirm)) {
n->confirmed = jiffies;
dst->pending_confirm = 0;
}

hh = &n->hh;
if ((n->nud_state & NUD_CONNECTED) && hh->hh_len)
return neigh_hh_output(hh, skb);
else
return n->output(n, skb);
}
/*neigh_alloc() 用于分配 neighbour 结构
neigh_create() 进一步设置此结构,对于 ARP 来说,它调用 arp_constructor() ,在这个函数里面,对 neighbour 的 ops 域和 output 域进行设置。
Ops 域,根据底层 driver 的类型进行不同的设置,
对于没有链路层地址的,指向arp_direct_ops
对于没有链路层 cache 的,指向arp_generic_ops
对于有链路层 cache 的, 指向arp_hh_ops
*/

邻居子系统1.4_#if

static int arp_constructor(struct neighbour *neigh)
{
__be32 addr = *(__be32 *)neigh->primary_key;
struct net_device *dev = neigh->dev;
struct in_device *in_dev;
struct neigh_parms *parms;

rcu_read_lock();
in_dev = __in_dev_get_rcu(dev);
if (in_dev == NULL) {
rcu_read_unlock();
return -EINVAL;
}

neigh->type = inet_addr_type(dev_net(dev), addr);

parms = in_dev->arp_parms;
__neigh_parms_put(neigh->parms);
neigh->parms = neigh_parms_clone(parms);
rcu_read_unlock();

if (!dev->header_ops) {//haed ops 不存在直接赋值
neigh->nud_state = NUD_NOARP;
neigh->ops = &arp_direct_ops;
neigh->output = neigh_direct_output;
} else {
/* Good devices (checked by reading texts, but only Ethernet is
tested)

ARPHRD_ETHER: (ethernet, apfddi)
ARPHRD_FDDI: (fddi)
ARPHRD_IEEE802: (tr)
ARPHRD_METRICOM: (strip)
ARPHRD_ARCNET:
etc. etc. etc.

ARPHRD_IPDDP will also work, if author repairs it.
I did not it, because this driver does not work even
in old paradigm.
*/

#if 1
/* So... these "amateur" devices are hopeless.
The only thing, that I can say now:
It is very sad that we need to keep ugly obsolete
code to make them happy.

They should be moved to more reasonable state, now
they use rebuild_header INSTEAD OF hard_start_xmit!!!
Besides that, they are sort of out of date
(a lot of redundant clones/copies, useless in 2.1),
I wonder why people believe that they work.
*/
switch (dev->type) {
default:
break;
case ARPHRD_ROSE:
#if IS_ENABLED(CONFIG_AX25)
case ARPHRD_AX25:
#if IS_ENABLED(CONFIG_NETROM)
case ARPHRD_NETROM:
#endif
neigh->ops = &arp_broken_ops;
neigh->output = neigh->ops->output;
return 0;
#else
break;
#endif
}
#endif
if (neigh->type == RTN_MULTICAST) {
neigh->nud_state = NUD_NOARP;
arp_mc_map(addr, neigh->ha, dev, 1);
} else if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) {
neigh->nud_state = NUD_NOARP;
memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
} else if (neigh->type == RTN_BROADCAST ||
(dev->flags & IFF_POINTOPOINT)) {
neigh->nud_state = NUD_NOARP;
memcpy(neigh->ha, dev->broadcast, dev->addr_len);
}
/* 设置neigh 接口**/
if (dev->header_ops->cache)
neigh->ops = &arp_hh_ops;
else
neigh->ops = &arp_generic_ops;

if (neigh->nud_state & NUD_VALID)
neigh->output = neigh->ops->connected_output;
else
neigh->output = neigh->ops->output;
}
return 0;

arp_constructor 解析如下

邻居子系统1.4_缓存_02邻居子系统1.4_#if

static int arp_constructor(struct neighbour *neigh)
{
__be32 addr = *(__be32 *)neigh->primary_key;
struct net_device *dev = neigh->dev;
struct in_device *in_dev;
struct neigh_parms *parms;

rcu_read_lock();
in_dev = __in_dev_get_rcu(dev);
if (in_dev == NULL) {
rcu_read_unlock();
return -EINVAL;
}

neigh->type = inet_addr_type(dev_net(dev), addr);

parms = in_dev->arp_parms;
__neigh_parms_put(neigh->parms);
neigh->parms = neigh_parms_clone(parms);
rcu_read_unlock();

if (!dev->header_ops) {//haed ops 不存在直接赋值
neigh->nud_state = NUD_NOARP;
neigh->ops = &arp_direct_ops;
neigh->output = neigh_direct_output;
} else {
/* Good devices (checked by reading texts, but only Ethernet is
tested)

ARPHRD_ETHER: (ethernet, apfddi)
ARPHRD_FDDI: (fddi)
ARPHRD_IEEE802: (tr)
ARPHRD_METRICOM: (strip)
ARPHRD_ARCNET:
etc. etc. etc.

ARPHRD_IPDDP will also work, if author repairs it.
I did not it, because this driver does not work even
in old paradigm.
*/

#if 1
/* So... these "amateur" devices are hopeless.
The only thing, that I can say now:
It is very sad that we need to keep ugly obsolete
code to make them happy.

They should be moved to more reasonable state, now
they use rebuild_header INSTEAD OF hard_start_xmit!!!
Besides that, they are sort of out of date
(a lot of redundant clones/copies, useless in 2.1),
I wonder why people believe that they work.
*/
switch (dev->type) {
default:
break;
case ARPHRD_ROSE:
#if IS_ENABLED(CONFIG_AX25)
case ARPHRD_AX25:
#if IS_ENABLED(CONFIG_NETROM)
case ARPHRD_NETROM:
#endif
neigh->ops = &arp_broken_ops;
neigh->output = neigh->ops->output;
return 0;
#else
break;
#endif
}
#endif
if (neigh->type == RTN_MULTICAST) {
neigh->nud_state = NUD_NOARP;
arp_mc_map(addr, neigh->ha, dev, 1);
} else if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) {
neigh->nud_state = NUD_NOARP;
memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
} else if (neigh->type == RTN_BROADCAST ||
(dev->flags & IFF_POINTOPOINT)) {
neigh->nud_state = NUD_NOARP;
memcpy(neigh->ha, dev->broadcast, dev->addr_len);
}
/* 设置neigh 接口**/
if (dev->header_ops->cache)
neigh->ops = &arp_hh_ops;
else
neigh->ops = &arp_generic_ops;

if (neigh->nud_state & NUD_VALID)
neigh->output = neigh->ops->connected_output;
else
neigh->output = neigh->ops->output;
}
return 0;
}

View Code

 

对于以太网驱动程序,它的 net_device 结构在初始化的时候,已经有了默认的 hard_header 和 hard_header_cache 函数

 

ether_setup()
dev->hard_header = eth_header;
dev->hard_header_cache = eth_header_cache;

 

 

默认情况下,它的 ops 指向 arp_hh_ops()

对于arp来说:其ops函数有如下:

邻居子系统1.4_缓存_02邻居子系统1.4_#if

static const struct neigh_ops arp_generic_ops = {
.family = AF_INET,
.solicit = arp_solicit,
.error_report = arp_error_report,
.output = neigh_resolve_output,
.connected_output = neigh_connected_output,
};

static const struct neigh_ops arp_hh_ops = {
.family = AF_INET,
.solicit = arp_solicit,
.error_report = arp_error_report,
.output = neigh_resolve_output,
.connected_output = neigh_resolve_output,
};

static const struct neigh_ops arp_direct_ops = {
.family = AF_INET,
.output = neigh_direct_output,
.connected_output = neigh_direct_output,
};

static const struct neigh_ops arp_broken_ops = {
.family = AF_INET,
.solicit = arp_solicit,
.error_report = arp_error_report,
.output = neigh_compat_output,
.connected_output = neigh_compat_output,
};

View Code

对于 output 域,关键是看 neighbour 的状态,如果是有效状态,则设置为 ops->connected_output(),这样可以加快速度,

否则设置为 ops->output(),这样,需要进行 neighbor discovery 的处理

Neighbor Discovery 的 过程

从上面的状态机可以看到,当 neighbour 处于 INCOMPLETE、PROBE 状态的时候,会发送 Neighbor Solicit 包:

例如,通过 neigh_resolve_output() 导致新创建一个 neighbour 结构后,最后会调用 neigh->ops->solicit() 来发送 NS 包,对于 ARP 来说,就是 arp_solicit():

/*neigh_resolve_output() ==>  neigh_event_send() ==> 
__neigh_event_send() ==>neigh_probe--> neigh->ops->solicit(neigh, skb); ==> arp_solicit()*/

arp_solicit 调用 arp_send() 构造并发送 ARP request:

对于 INCOMPLETE 状态,需要发送一个新的 ARP 请求,它的目的 MAC 地址是广播地址,这样链路上所有节点都能收到此广播包;

对于 PROBE 状态, neighbour 中已经有了对端的 MAC 地址,此时发 ARP request 的目的只是验证这个映射还是有效的,因此此时发出的 ARP 包的目的 MAC 地址可以从 neighbour 中取到,是一个单播的 ARP 包。

 neigh_resolve_output 分析:

 

int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
int rc = 0;

if (!dst)
goto discard;

if (!neigh_event_send(neigh, skb)) {//其返回值很重要 /* 检测邻居项状态有效性 */
int err;
struct net_device *dev = neigh->dev;
unsigned int seq;

if (dev->header_ops->cache && !neigh->hh.hh_len) /* 有二层头缓存函数,则缓存之 */
neigh_hh_init(neigh, dst);

do { /* 填充二层头 */
__skb_pull(skb, skb_network_offset(skb));
seq = read_seqbegin(&neigh->ha_lock);
err = dev_hard_header(skb, dev, ntohs(skb->protocol),
neigh->ha, NULL, skb->len);
} while (read_seqretry(&neigh->ha_lock, seq));

if (err >= 0) /* 数据包发送 */
rc = dev_queue_xmit(skb);
else
goto out_kfree_skb;
}
out:
return rc;
discard:
NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
dst, neigh);
out_kfree_skb:
rc = -EINVAL;
kfree_skb(skb);
goto out;
}

 

http代理服务器(3-4-7层代理)-网络事件库公共组件、内核kernel驱动 摄像头驱动 tcpip网络协议栈、netfilter、bridge 好像看过!!!! 但行好事 莫问前程 --身高体重180的胖子

举报

相关推荐

0 条评论