From 9e4505c459440a41fd466451cf840dec5c957eeb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 10 Nov 2007 21:18:39 -0800 Subject: [PATCH 01/39] [INET]: Add a missing include to inet_hashtables.h Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 8461cda37490..469216d93663 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include From 8dd71c5d28cd88d4400e7f474986e799e39aff37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=E4rvinen?= Date: Sat, 10 Nov 2007 21:20:59 -0800 Subject: [PATCH 02/39] [TCP]: Consider GSO while counting reord in sacktag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reordering detection fails to take account that the reordered skb may have pcount larger than 1. In such case the lowest of them had the largest reordering, the old formula used the highest of them which is pcount - 1 packets less reordered. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ca9590f4f520..0f757578f3bd 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1403,8 +1403,6 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ if (in_sack < 0) break; - fack_count += tcp_skb_pcount(skb); - sacked = TCP_SKB_CB(skb)->sacked; /* Account D-SACK for retransmitted packet. */ @@ -1427,11 +1425,14 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ } /* Nothing to do; acked frame is about to be dropped. */ + fack_count += tcp_skb_pcount(skb); continue; } - if (!in_sack) + if (!in_sack) { + fack_count += tcp_skb_pcount(skb); continue; + } if (!(sacked&TCPCB_SACKED_ACKED)) { if (sacked & TCPCB_SACKED_RETRANS) { @@ -1480,6 +1481,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ flag |= FLAG_DATA_SACKED; tp->sacked_out += tcp_skb_pcount(skb); + fack_count += tcp_skb_pcount(skb); if (fack_count > tp->fackets_out) tp->fackets_out = fack_count; @@ -1490,6 +1492,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ } else { if (dup_sack && (sacked&TCPCB_RETRANS)) reord = min(fack_count, reord); + + fack_count += tcp_skb_pcount(skb); } /* D-SACK. We can detect redundant retransmission @@ -1515,7 +1519,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss && (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark))) - tcp_update_reordering(sk, ((tp->fackets_out + 1) - reord), 0); + tcp_update_reordering(sk, tp->fackets_out - reord, 0); #if FASTRETRANS_DEBUG > 0 BUG_TRAP((int)tp->sacked_out >= 0); From c7caf8d3ed7a6617aa0a3083815c439bd952c45c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=E4rvinen?= Date: Sat, 10 Nov 2007 21:22:18 -0800 Subject: [PATCH 03/39] [TCP]: Fix reord detection due to snd_una covered holes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes subtle bug like the one with fastpath_cnt_hint happening due to the way the GSO and hints interact. Because hints are not reset when just a GSOed skb is partially ACKed, there's no guarantee that the relevant part of the write queue is going to be processed in sacktag at all (skbs below snd_una) because fastpath hint can fast forward the entrypoint. This was also on the way of future reductions in sacktag's skb processing. Also future cleanups in sacktag can be made after this (in 2.6.25). This may make reordering update in tcp_try_undo_partial redundant but I'm not too sure so I left it there. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 50 ++++++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0f757578f3bd..9fc9096ada8a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1417,11 +1417,6 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ if ((dup_sack && in_sack) && (sacked&TCPCB_SACKED_ACKED)) reord = min(fack_count, reord); - } else { - /* If it was in a hole, we detected reordering. */ - if (fack_count < prior_fackets && - !(sacked&TCPCB_SACKED_ACKED)) - reord = min(fack_count, reord); } /* Nothing to do; acked frame is about to be dropped. */ @@ -2634,7 +2629,8 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) * is before the ack sequence we can discard it as it's confirmed to have * arrived at the other end. */ -static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p) +static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p, + int prior_fackets) { struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); @@ -2643,6 +2639,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p) int fully_acked = 1; int flag = 0; int prior_packets = tp->packets_out; + u32 cnt = 0; + u32 reord = tp->packets_out; s32 seq_rtt = -1; ktime_t last_ackt = net_invalid_timestamp(); @@ -2683,10 +2681,14 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p) if ((flag & FLAG_DATA_ACKED) || (packets_acked > 1)) flag |= FLAG_NONHEAD_RETRANS_ACKED; - } else if (seq_rtt < 0) { - seq_rtt = now - scb->when; - if (fully_acked) - last_ackt = skb->tstamp; + } else { + if (seq_rtt < 0) { + seq_rtt = now - scb->when; + if (fully_acked) + last_ackt = skb->tstamp; + } + if (!(sacked & TCPCB_SACKED_ACKED)) + reord = min(cnt, reord); } if (sacked & TCPCB_SACKED_ACKED) @@ -2697,12 +2699,16 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p) if ((sacked & TCPCB_URG) && tp->urg_mode && !before(end_seq, tp->snd_up)) tp->urg_mode = 0; - } else if (seq_rtt < 0) { - seq_rtt = now - scb->when; - if (fully_acked) - last_ackt = skb->tstamp; + } else { + if (seq_rtt < 0) { + seq_rtt = now - scb->when; + if (fully_acked) + last_ackt = skb->tstamp; + } + reord = min(cnt, reord); } tp->packets_out -= packets_acked; + cnt += packets_acked; /* Initial outgoing SYN's get put onto the write_queue * just like anything else we transmit. It is not @@ -2734,13 +2740,18 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p) tcp_ack_update_rtt(sk, flag, seq_rtt); tcp_rearm_rto(sk); + if (tcp_is_reno(tp)) { + tcp_remove_reno_sacks(sk, pkts_acked); + } else { + /* Non-retransmitted hole got filled? That's reordering */ + if (reord < prior_fackets) + tcp_update_reordering(sk, tp->fackets_out - reord, 0); + } + tp->fackets_out -= min(pkts_acked, tp->fackets_out); /* hint's skb might be NULL but we don't need to care */ tp->fastpath_cnt_hint -= min_t(u32, pkts_acked, tp->fastpath_cnt_hint); - if (tcp_is_reno(tp)) - tcp_remove_reno_sacks(sk, pkts_acked); - if (ca_ops->pkts_acked) { s32 rtt_us = -1; @@ -3023,6 +3034,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) u32 ack_seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; u32 prior_in_flight; + u32 prior_fackets; s32 seq_rtt; int prior_packets; int frto_cwnd = 0; @@ -3047,6 +3059,8 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) tp->bytes_acked += min(ack - prior_snd_una, tp->mss_cache); } + prior_fackets = tp->fackets_out; + if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) { /* Window is constant, pure forward advance. * No more checks are required. @@ -3088,7 +3102,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) prior_in_flight = tcp_packets_in_flight(tp); /* See if we can take anything off of the retransmit queue. */ - flag |= tcp_clean_rtx_queue(sk, &seq_rtt); + flag |= tcp_clean_rtx_queue(sk, &seq_rtt, prior_fackets); /* Guarantee sacktag reordering detection against wrap-arounds */ if (before(tp->frto_highmark, tp->snd_una)) From e49aa5d456802c6bec59b29d1d7cbd8e9cc71709 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=E4rvinen?= Date: Sat, 10 Nov 2007 21:23:08 -0800 Subject: [PATCH 04/39] [TCP]: Add unlikely() to sacktag out-of-mem in fragment case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9fc9096ada8a..84bcdc94dfa9 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1400,7 +1400,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ /* DSACK info lost if out-of-mem, try SACK still */ if (in_sack <= 0) in_sack = tcp_match_skb_to_sack(sk, skb, start_seq, end_seq); - if (in_sack < 0) + if (unlikely(in_sack < 0)) break; sacked = TCP_SKB_CB(skb)->sacked; From fbd52eb2bd17bd3468974aa2fdce140f0cd32fc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=E4rvinen?= Date: Sat, 10 Nov 2007 21:24:19 -0800 Subject: [PATCH 05/39] [TCP]: Split SACK FRTO flag clearing (fixes FRTO corner case bug) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In case we run out of mem when fragmenting, the clearing of FLAG_ONLY_ORIG_SACKED might get missed which then feeds FRTO with false information. Move clearing outside skb processing loop so that it will get executed even if the skb loop terminates prematurely due to out-of-mem. Besides, now the core of the loop truly deals with a single skb only, which also enables creation a more self-contained of tcp_sacktag_one later on. In addition, small reorganization of if branches was made. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 84bcdc94dfa9..20c9440ab85e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1444,12 +1444,17 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ tp->retransmit_skb_hint = NULL; } } else { - /* New sack for not retransmitted frame, - * which was in hole. It is reordering. - */ - if (!(sacked & TCPCB_RETRANS) && - fack_count < prior_fackets) - reord = min(fack_count, reord); + if (!(sacked & TCPCB_RETRANS)) { + /* New sack for not retransmitted frame, + * which was in hole. It is reordering. + */ + if (fack_count < prior_fackets) + reord = min(fack_count, reord); + + /* SACK enhanced F-RTO (RFC4138; Appendix B) */ + if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) + flag |= FLAG_ONLY_ORIG_SACKED; + } if (sacked & TCPCB_LOST) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; @@ -1458,18 +1463,6 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ /* clear lost hint */ tp->retransmit_skb_hint = NULL; } - /* SACK enhanced F-RTO detection. - * Set flag if and only if non-rexmitted - * segments below frto_highmark are - * SACKed (RFC4138; Appendix B). - * Clearing correct due to in-order walk - */ - if (after(end_seq, tp->frto_highmark)) { - flag &= ~FLAG_ONLY_ORIG_SACKED; - } else { - if (!(sacked & TCPCB_RETRANS)) - flag |= FLAG_ONLY_ORIG_SACKED; - } } TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED; @@ -1503,6 +1496,12 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ tp->retransmit_skb_hint = NULL; } } + + /* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct + * due to in-order walk + */ + if (after(end_seq, tp->frto_highmark)) + flag &= ~FLAG_ONLY_ORIG_SACKED; } if (tp->retrans_out && From b1667609cd9a98ce32559e06f36fca54c775a51f Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Sat, 10 Nov 2007 21:26:41 -0800 Subject: [PATCH 06/39] [IPV4]: Remove bugus goto-s from ip_route_input_slow Both places look like if (err == XXX) goto yyy; done: while both yyy targets look like err = XXX; goto done; so this is ok to remove the above if-s. yyy labels are used in other places and are not removed. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv4/route.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 21b12de9e653..c95b270ba350 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1813,11 +1813,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, goto martian_destination; err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); - if (err == -ENOBUFS) - goto e_nobufs; - if (err == -EINVAL) - goto e_inval; - done: in_dev_put(in_dev); if (free_res) From 03f49f345749abc08bc84b835433c94eea6e972b Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Sat, 10 Nov 2007 21:28:34 -0800 Subject: [PATCH 07/39] [NET]: Make helper to get dst entry and "use" it There are many places that get the dst entry, increase the __use counter and set the "lastuse" time stamp. Make a helper for this. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/dst.h | 7 +++++++ net/decnet/dn_route.c | 16 ++++------------ net/ipv4/route.c | 12 +++--------- net/ipv6/route.c | 6 +----- 4 files changed, 15 insertions(+), 26 deletions(-) diff --git a/include/net/dst.h b/include/net/dst.h index e9ff4a4caef9..2f65e894b829 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -143,6 +143,13 @@ static inline void dst_hold(struct dst_entry * dst) atomic_inc(&dst->__refcnt); } +static inline void dst_use(struct dst_entry *dst, unsigned long time) +{ + dst_hold(dst); + dst->__use++; + dst->lastuse = time; +} + static inline struct dst_entry * dst_clone(struct dst_entry * dst) { diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 97eee5e8fbbe..66663e5d7acd 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -293,9 +293,7 @@ static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route * dn_rt_hash_table[hash].chain); rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth); - rth->u.dst.__use++; - dst_hold(&rth->u.dst); - rth->u.dst.lastuse = now; + dst_use(&rth->u.dst, now); spin_unlock_bh(&dn_rt_hash_table[hash].lock); dnrt_drop(rt); @@ -308,9 +306,7 @@ static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route * rcu_assign_pointer(rt->u.dst.dn_next, dn_rt_hash_table[hash].chain); rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt); - dst_hold(&rt->u.dst); - rt->u.dst.__use++; - rt->u.dst.lastuse = now; + dst_use(&rt->u.dst, now); spin_unlock_bh(&dn_rt_hash_table[hash].lock); *rp = rt; return 0; @@ -1182,9 +1178,7 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *fl (flp->mark == rt->fl.mark) && (rt->fl.iif == 0) && (rt->fl.oif == flp->oif)) { - rt->u.dst.lastuse = jiffies; - dst_hold(&rt->u.dst); - rt->u.dst.__use++; + dst_use(&rt->u.dst, jiffies); rcu_read_unlock_bh(); *pprt = &rt->u.dst; return 0; @@ -1456,9 +1450,7 @@ int dn_route_input(struct sk_buff *skb) (rt->fl.oif == 0) && (rt->fl.mark == skb->mark) && (rt->fl.iif == cb->iif)) { - rt->u.dst.lastuse = jiffies; - dst_hold(&rt->u.dst); - rt->u.dst.__use++; + dst_use(&rt->u.dst, jiffies); rcu_read_unlock(); skb->dst = (struct dst_entry *)rt; return 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c95b270ba350..45651834e1e2 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -851,9 +851,7 @@ restart: */ rcu_assign_pointer(rt_hash_table[hash].chain, rth); - rth->u.dst.__use++; - dst_hold(&rth->u.dst); - rth->u.dst.lastuse = now; + dst_use(&rth->u.dst, now); spin_unlock_bh(rt_hash_lock_addr(hash)); rt_drop(rt); @@ -1930,9 +1928,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->fl.oif == 0 && rth->fl.mark == skb->mark && rth->fl.fl4_tos == tos) { - rth->u.dst.lastuse = jiffies; - dst_hold(&rth->u.dst); - rth->u.dst.__use++; + dst_use(&rth->u.dst, jiffies); RT_CACHE_STAT_INC(in_hit); rcu_read_unlock(); skb->dst = (struct dst_entry*)rth; @@ -2326,9 +2322,7 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp) rth->fl.mark == flp->mark && !((rth->fl.fl4_tos ^ flp->fl4_tos) & (IPTOS_RT_MASK | RTO_ONLINK))) { - rth->u.dst.lastuse = jiffies; - dst_hold(&rth->u.dst); - rth->u.dst.__use++; + dst_use(&rth->u.dst, jiffies); RT_CACHE_STAT_INC(out_hit); rcu_read_unlock_bh(); *rp = rth; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 973a97abc446..6ecb5e6fae2e 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -544,12 +544,8 @@ restart: rt = rt6_device_match(rt, fl->oif, flags); BACKTRACK(&fl->fl6_src); out: - dst_hold(&rt->u.dst); + dst_use(&rt->u.dst, jiffies); read_unlock_bh(&table->tb6_lock); - - rt->u.dst.lastuse = jiffies; - rt->u.dst.__use++; - return rt; } From e9671fcb3bef1fe2e71aa0456bd5b7eec9e8de4d Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sat, 10 Nov 2007 21:36:04 -0800 Subject: [PATCH 08/39] [NET]: Fix infinite loop in dev_mc_unsync(). From: Joe Perches Based upon an initial patch and report by Luis R. Rodriguez. Signed-off-by: David S. Miller --- net/core/dev_mcast.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index ae354057d84c..647973daca2b 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -168,13 +168,13 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from) da = from->mc_list; while (da != NULL) { next = da->next; - if (!da->da_synced) - continue; - __dev_addr_delete(&to->mc_list, &to->mc_count, - da->da_addr, da->da_addrlen, 0); - da->da_synced = 0; - __dev_addr_delete(&from->mc_list, &from->mc_count, - da->da_addr, da->da_addrlen, 0); + if (da->da_synced) { + __dev_addr_delete(&to->mc_list, &to->mc_count, + da->da_addr, da->da_addrlen, 0); + da->da_synced = 0; + __dev_addr_delete(&from->mc_list, &from->mc_count, + da->da_addr, da->da_addrlen, 0); + } da = next; } __dev_set_rx_mode(to); From 17ab56a260734aabf7f03cc97785dda81571ea24 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Sat, 10 Nov 2007 21:38:48 -0800 Subject: [PATCH 09/39] [PACKET]: Use existing sock refcnt debugging infrastructure The packet_socks_nr variable is used purely for debugging the number of sockets. As Arnaldo pointed out, there's already an infrastructure for this purposes, so switch to using it. Signed-off-by: Pavel Emelyanov Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/packet/af_packet.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 4cb2dfba0993..36331a5f0abe 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -139,9 +139,6 @@ dev->hard_header == NULL (ll header is added by device, we cannot control it) static HLIST_HEAD(packet_sklist); static DEFINE_RWLOCK(packet_sklist_lock); -static atomic_t packet_socks_nr; - - /* Private packet socket structures. */ struct packet_mclist @@ -236,10 +233,7 @@ static void packet_sock_destruct(struct sock *sk) return; } - atomic_dec(&packet_socks_nr); -#ifdef PACKET_REFCNT_DEBUG - printk(KERN_DEBUG "PACKET socket %p is free, %d are alive\n", sk, atomic_read(&packet_socks_nr)); -#endif + sk_refcnt_debug_dec(sk); } @@ -849,6 +843,7 @@ static int packet_release(struct socket *sock) /* Purge queues */ skb_queue_purge(&sk->sk_receive_queue); + sk_refcnt_debug_release(sk); sock_put(sk); return 0; @@ -1010,7 +1005,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol) po->num = proto; sk->sk_destruct = packet_sock_destruct; - atomic_inc(&packet_socks_nr); + sk_refcnt_debug_inc(sk); /* * Attach a protocol block From c2b42336f4a733020360157ba629d37f1410923a Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Sat, 10 Nov 2007 21:39:26 -0800 Subject: [PATCH 10/39] [IPX]: Use existing sock refcnt debugging infrastructure Just like in the af_packet.c, the ipx_sock_nr variable is used for debugging purposes. Switch to using existing infrastructure. Thanks to Arnaldo for pointing this out. Signed-off-by: Pavel Emelyanov Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/ipx/af_ipx.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index a195a66e0cc7..c76a9523091b 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -92,11 +92,6 @@ extern int ipxrtr_route_skb(struct sk_buff *skb); extern struct ipx_route *ipxrtr_lookup(__be32 net); extern int ipxrtr_ioctl(unsigned int cmd, void __user *arg); -#undef IPX_REFCNT_DEBUG -#ifdef IPX_REFCNT_DEBUG -atomic_t ipx_sock_nr; -#endif - struct ipx_interface *ipx_interfaces_head(void) { struct ipx_interface *rc = NULL; @@ -151,14 +146,7 @@ static void ipx_destroy_socket(struct sock *sk) { ipx_remove_socket(sk); skb_queue_purge(&sk->sk_receive_queue); -#ifdef IPX_REFCNT_DEBUG - atomic_dec(&ipx_sock_nr); - printk(KERN_DEBUG "IPX socket %p released, %d are still alive\n", sk, - atomic_read(&ipx_sock_nr)); - if (atomic_read(&sk->sk_refcnt) != 1) - printk(KERN_DEBUG "Destruction sock ipx %p delayed, cnt=%d\n", - sk, atomic_read(&sk->sk_refcnt)); -#endif + sk_refcnt_debug_dec(sk); sock_put(sk); } @@ -1384,11 +1372,8 @@ static int ipx_create(struct net *net, struct socket *sock, int protocol) sk = sk_alloc(net, PF_IPX, GFP_KERNEL, &ipx_proto); if (!sk) goto out; -#ifdef IPX_REFCNT_DEBUG - atomic_inc(&ipx_sock_nr); - printk(KERN_DEBUG "IPX socket %p created, now we have %d alive\n", sk, - atomic_read(&ipx_sock_nr)); -#endif + + sk_refcnt_debug_inc(sk); sock_init_data(sock, sk); sk->sk_no_check = 1; /* Checksum off by default */ sock->ops = &ipx_dgram_ops; @@ -1409,6 +1394,7 @@ static int ipx_release(struct socket *sock) sock_set_flag(sk, SOCK_DEAD); sock->sk = NULL; + sk_refcnt_debug_release(sk); ipx_destroy_socket(sk); out: return 0; From 99f933263ac30eafbb008d01ac1dd0adf40fc343 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Sat, 10 Nov 2007 21:47:39 -0800 Subject: [PATCH 11/39] [INET]: Add missed tunnel64_err handler The tunnel64_protocol uses the tunnel4_protocol's err_handler and thus calls the tunnel4_protocol's handlers. This is not very good, as in case of (icmp) error the wrong error handlers will be called (e.g. ipip ones instead of sit) and this won't be noticed at all, because the error is not reported. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv4/tunnel4.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c index a794a8ca8b4f..d619d2e83f5d 100644 --- a/net/ipv4/tunnel4.c +++ b/net/ipv4/tunnel4.c @@ -118,6 +118,17 @@ static void tunnel4_err(struct sk_buff *skb, u32 info) break; } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static void tunnel64_err(struct sk_buff *skb, u32 info) +{ + struct xfrm_tunnel *handler; + + for (handler = tunnel64_handlers; handler; handler = handler->next) + if (!handler->err_handler(skb, info)) + break; +} +#endif + static struct net_protocol tunnel4_protocol = { .handler = tunnel4_rcv, .err_handler = tunnel4_err, @@ -127,7 +138,7 @@ static struct net_protocol tunnel4_protocol = { #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) static struct net_protocol tunnel64_protocol = { .handler = tunnel64_rcv, - .err_handler = tunnel4_err, + .err_handler = tunnel64_err, .no_policy = 1, }; #endif From 358352b8b83c67ecf75f6d7bc3e2d64bf0cf506a Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Sat, 10 Nov 2007 21:48:54 -0800 Subject: [PATCH 12/39] [INET]: Cleanup the xfrm4_tunnel_(un)register Both check for the family to select an appropriate tunnel list. Consolidate this check and make the for() loop more readable. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv4/tunnel4.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c index d619d2e83f5d..978b3fd61e65 100644 --- a/net/ipv4/tunnel4.c +++ b/net/ipv4/tunnel4.c @@ -17,6 +17,11 @@ static struct xfrm_tunnel *tunnel4_handlers; static struct xfrm_tunnel *tunnel64_handlers; static DEFINE_MUTEX(tunnel4_mutex); +static inline struct xfrm_tunnel **fam_handlers(unsigned short family) +{ + return (family == AF_INET) ? &tunnel4_handlers : &tunnel64_handlers; +} + int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family) { struct xfrm_tunnel **pprev; @@ -25,8 +30,7 @@ int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family) mutex_lock(&tunnel4_mutex); - for (pprev = (family == AF_INET) ? &tunnel4_handlers : &tunnel64_handlers; - *pprev; pprev = &(*pprev)->next) { + for (pprev = fam_handlers(family); *pprev; pprev = &(*pprev)->next) { if ((*pprev)->priority > priority) break; if ((*pprev)->priority == priority) @@ -53,8 +57,7 @@ int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family) mutex_lock(&tunnel4_mutex); - for (pprev = (family == AF_INET) ? &tunnel4_handlers : &tunnel64_handlers; - *pprev; pprev = &(*pprev)->next) { + for (pprev = fam_handlers(family); *pprev; pprev = &(*pprev)->next) { if (*pprev == handler) { *pprev = handler->next; ret = 0; From d932e04a5e7b146c5f9bf517714b986a432a7594 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 10 Nov 2007 21:51:40 -0800 Subject: [PATCH 13/39] [VLAN]: Don't synchronize addresses while the vlan device is down While the VLAN device is down, the unicast addresses are not configured on the underlying device, so we shouldn't attempt to sync them. Noticed by Dmitry Butskoy Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/8021q/vlan.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 1037748c14db..0fadbc6fbc3f 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -636,6 +636,10 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, if (!vlandev) continue; + flgs = vlandev->flags; + if (!(flgs & IFF_UP)) + continue; + vlan_sync_address(dev, vlandev); } break; From 39aaac114e192bce500204f9c9e1fffff4c2b519 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 10 Nov 2007 21:52:35 -0800 Subject: [PATCH 14/39] [VLAN]: Allow setting mac address while device is up Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/8021q/vlan.c | 1 + net/8021q/vlan.h | 1 + net/8021q/vlan_dev.c | 26 ++++++++++++++++++++++++++ 3 files changed, 28 insertions(+) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 0fadbc6fbc3f..6567213959cb 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -376,6 +376,7 @@ void vlan_setup(struct net_device *new_dev) new_dev->init = vlan_dev_init; new_dev->open = vlan_dev_open; new_dev->stop = vlan_dev_stop; + new_dev->set_mac_address = vlan_set_mac_address; new_dev->set_multicast_list = vlan_dev_set_multicast_list; new_dev->change_rx_flags = vlan_change_rx_flags; new_dev->destructor = free_netdev; diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index cf4a80d06b35..2cd1393073ec 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -60,6 +60,7 @@ int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, struct net_device *dev int vlan_dev_change_mtu(struct net_device *dev, int new_mtu); int vlan_dev_open(struct net_device* dev); int vlan_dev_stop(struct net_device* dev); +int vlan_set_mac_address(struct net_device *dev, void *p); int vlan_dev_ioctl(struct net_device* dev, struct ifreq *ifr, int cmd); void vlan_dev_set_ingress_priority(const struct net_device *dev, u32 skb_prio, short vlan_prio); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 1a1740aa9a8b..7a36878241da 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -665,6 +665,32 @@ int vlan_dev_stop(struct net_device *dev) return 0; } +int vlan_set_mac_address(struct net_device *dev, void *p) +{ + struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev; + struct sockaddr *addr = p; + int err; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + if (!(dev->flags & IFF_UP)) + goto out; + + if (compare_ether_addr(addr->sa_data, real_dev->dev_addr)) { + err = dev_unicast_add(real_dev, addr->sa_data, ETH_ALEN); + if (err < 0) + return err; + } + + if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) + dev_unicast_delete(real_dev, dev->dev_addr, ETH_ALEN); + +out: + memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); + return 0; +} + int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev; From 78608ba0326f1448f9a10dbb402a38192559f639 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 10 Nov 2007 21:53:30 -0800 Subject: [PATCH 15/39] [NET]: Fix skb_truesize_check() assertion The intent of the assertion in skb_truesize_check() is to check for skb->truesize being decremented too much by other code, resulting in a wraparound below zero. The type of the right side of the comparison causes the compiler to promote the left side to an unsigned type, despite the presence of an explicit type cast. This defeats the check for negativity. Ensure both sides of the comparison are a signed type to prevent the implicit type conversion. Signed-off-by: Chuck Lever Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 94e49915a8c0..91140fe8c119 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -387,7 +387,9 @@ extern void skb_truesize_bug(struct sk_buff *skb); static inline void skb_truesize_check(struct sk_buff *skb) { - if (unlikely((int)skb->truesize < sizeof(struct sk_buff) + skb->len)) + int len = sizeof(struct sk_buff) + skb->len; + + if (unlikely((int)skb->truesize < len)) skb_truesize_bug(skb); } From b226801676d9533d09da511eb379fe970fa1a770 Mon Sep 17 00:00:00 2001 From: Radu Rendec Date: Sat, 10 Nov 2007 21:54:50 -0800 Subject: [PATCH 16/39] [PKT_SCHED] CLS_U32: Use ffs() instead of C code on hash mask to get first set bit. Computing the rank of the first set bit in the hash mask (for using later in u32_hash_fold()) was done with plain C code. Using ffs() instead makes the code more readable and improves performance (since ffs() is better optimized in assembler). Using the conditional operator on hash mask before applying ntohl() also saves one ntohl() call if mask is 0. Signed-off-by: Radu Rendec Signed-off-by: Jarek Poplawski Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 53171029439f..c39008209164 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -613,17 +613,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle, memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key)); n->ht_up = ht; n->handle = handle; -{ - u8 i = 0; - u32 mask = ntohl(s->hmask); - if (mask) { - while (!(mask & 1)) { - i++; - mask>>=1; - } - } - n->fshift = i; -} + n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0; #ifdef CONFIG_CLS_U32_MARK if (tb[TCA_U32_MARK-1]) { From 94e10bfb8a7372df3ef2759c9ec2a37de2f24aca Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 25 Oct 2007 22:16:23 +0200 Subject: [PATCH 17/39] softmac: fix wext MLME request reason code endianness The MLME request reason code is host-endian and our passing it to the low level functions is host-endian as well since they do the swapping. I noticed that the reason code 768 was sent (0x300) rather than 3 when wpa_supplicant terminates. This removes the superfluous cpu_to_le16() call. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/ieee80211/softmac/ieee80211softmac_wx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ieee80211/softmac/ieee80211softmac_wx.c b/net/ieee80211/softmac/ieee80211softmac_wx.c index ac36767b56e8..e01b59aedc54 100644 --- a/net/ieee80211/softmac/ieee80211softmac_wx.c +++ b/net/ieee80211/softmac/ieee80211softmac_wx.c @@ -470,7 +470,7 @@ ieee80211softmac_wx_set_mlme(struct net_device *dev, { struct ieee80211softmac_device *mac = ieee80211_priv(dev); struct iw_mlme *mlme = (struct iw_mlme *)extra; - u16 reason = cpu_to_le16(mlme->reason_code); + u16 reason = mlme->reason_code; struct ieee80211softmac_network *net; int err = -EINVAL; From 8d8c90e3fd1f8895f6d48bdcb34ba69a1fe73616 Mon Sep 17 00:00:00 2001 From: Michael Buesch Date: Sat, 27 Oct 2007 15:14:39 +0200 Subject: [PATCH 18/39] ssb: Fix initcall ordering ssb must init after PCI but before the ssb drivers. Signed-off-by: Michael Buesch Cc: Christian Casteyde Fixes-bug: #9219 Signed-off-by: John W. Linville --- drivers/ssb/main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c index c12a741b5574..fc1d589dc675 100644 --- a/drivers/ssb/main.c +++ b/drivers/ssb/main.c @@ -1147,7 +1147,10 @@ static int __init ssb_modinit(void) return err; } -subsys_initcall(ssb_modinit); +/* ssb must be initialized after PCI but before the ssb drivers. + * That means we must use some initcall between subsys_initcall + * and device_initcall. */ +fs_initcall(ssb_modinit); static void __exit ssb_modexit(void) { From 8a8f1c0437a77cce29c1cb6089f01f22a6d9ca6e Mon Sep 17 00:00:00 2001 From: Michael Buesch Date: Sun, 28 Oct 2007 13:07:54 +0100 Subject: [PATCH 19/39] rfkill: Register LED triggers before registering switch Registering the switch triggers a LED event, so we must register LED triggers before the switch. This has a potential to fix a crash, depending on how the device driver initializes the rfkill data structure. Signed-off-by: Michael Buesch Signed-off-by: Ivo van Doorn Signed-off-by: John W. Linville --- net/rfkill/rfkill.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c index 51d151c0e962..c6a9412fe5bd 100644 --- a/net/rfkill/rfkill.c +++ b/net/rfkill/rfkill.c @@ -388,19 +388,20 @@ int rfkill_register(struct rfkill *rfkill) if (!rfkill->toggle_radio) return -EINVAL; + snprintf(dev->bus_id, sizeof(dev->bus_id), + "rfkill%ld", (long)atomic_inc_return(&rfkill_no) - 1); + + rfkill_led_trigger_register(rfkill); + error = rfkill_add_switch(rfkill); if (error) return error; - snprintf(dev->bus_id, sizeof(dev->bus_id), - "rfkill%ld", (long)atomic_inc_return(&rfkill_no) - 1); - error = device_add(dev); if (error) { rfkill_remove_switch(rfkill); return error; } - rfkill_led_trigger_register(rfkill); return 0; } @@ -416,9 +417,9 @@ EXPORT_SYMBOL(rfkill_register); */ void rfkill_unregister(struct rfkill *rfkill) { - rfkill_led_trigger_unregister(rfkill); device_del(&rfkill->dev); rfkill_remove_switch(rfkill); + rfkill_led_trigger_unregister(rfkill); put_device(&rfkill->dev); } EXPORT_SYMBOL(rfkill_unregister); From ac71c691e6a5ce991fe221d3bdb0c972f617aa37 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 28 Oct 2007 14:17:44 +0100 Subject: [PATCH 20/39] mac80211: make simple rate control algorithm built-in Too frequently people do not have module autoloading enabled or fail to install the rate control module correctly, hence their hardware probing fails due to no rate control algorithm being available. This makes the 'simple' algorithm built into the mac80211 module unless EMBEDDED is enabled in which case it can be disabled (eg. if the wanted driver requires another rate control algorithm.) Signed-off-by: Johannes Berg Acked-by: Michael Buesch Signed-off-by: John W. Linville --- net/mac80211/Kconfig | 12 ++++++++++++ net/mac80211/Makefile | 3 ++- net/mac80211/ieee80211.c | 13 +++++++++++++ net/mac80211/ieee80211_rate.c | 13 +++++++++++-- net/mac80211/ieee80211_rate.h | 3 +++ net/mac80211/rc80211_simple.c | 25 +------------------------ 6 files changed, 42 insertions(+), 27 deletions(-) diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 6fffb3845ab6..32c8c08c4683 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -13,6 +13,18 @@ config MAC80211 This option enables the hardware independent IEEE 802.11 networking stack. +config MAC80211_RCSIMPLE + bool "'simple' rate control algorithm" + default y + depends on MAC80211 && EMBEDDED + help + This option allows you to turn off the 'simple' rate + control algorithm in mac80211. If you do turn it off, + you absolutely need another rate control algorithm. + + Say Y unless you know you will have another algorithm + available. + config MAC80211_LEDS bool "Enable LED triggers" depends on MAC80211 && LEDS_TRIGGERS diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index 219cd9f9341f..1e6237b34846 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -1,8 +1,9 @@ -obj-$(CONFIG_MAC80211) += mac80211.o rc80211_simple.o +obj-$(CONFIG_MAC80211) += mac80211.o mac80211-objs-$(CONFIG_MAC80211_LEDS) += ieee80211_led.o mac80211-objs-$(CONFIG_MAC80211_DEBUGFS) += debugfs.o debugfs_sta.o debugfs_netdev.o debugfs_key.o mac80211-objs-$(CONFIG_NET_SCHED) += wme.o +mac80211-objs-$(CONFIG_MAC80211_RCSIMPLE) += rc80211_simple.o mac80211-objs := \ ieee80211.o \ diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c index f484ca7ade9c..52ba6ef90685 100644 --- a/net/mac80211/ieee80211.c +++ b/net/mac80211/ieee80211.c @@ -1233,8 +1233,17 @@ static int __init ieee80211_init(void) BUILD_BUG_ON(sizeof(struct ieee80211_tx_packet_data) > sizeof(skb->cb)); +#ifdef CONFIG_MAC80211_RCSIMPLE + ret = ieee80211_rate_control_register(&mac80211_rcsimple); + if (ret) + return ret; +#endif + ret = ieee80211_wme_register(); if (ret) { +#ifdef CONFIG_MAC80211_RCSIMPLE + ieee80211_rate_control_unregister(&mac80211_rcsimple); +#endif printk(KERN_DEBUG "ieee80211_init: failed to " "initialize WME (err=%d)\n", ret); return ret; @@ -1248,6 +1257,10 @@ static int __init ieee80211_init(void) static void __exit ieee80211_exit(void) { +#ifdef CONFIG_MAC80211_RCSIMPLE + ieee80211_rate_control_unregister(&mac80211_rcsimple); +#endif + ieee80211_wme_unregister(); ieee80211_debugfs_netdev_exit(); } diff --git a/net/mac80211/ieee80211_rate.c b/net/mac80211/ieee80211_rate.c index 93abb8fff141..e4bd8481554d 100644 --- a/net/mac80211/ieee80211_rate.c +++ b/net/mac80211/ieee80211_rate.c @@ -25,6 +25,9 @@ int ieee80211_rate_control_register(struct rate_control_ops *ops) { struct rate_control_alg *alg; + if (!ops->name) + return -EINVAL; + alg = kzalloc(sizeof(*alg), GFP_KERNEL); if (alg == NULL) { return -ENOMEM; @@ -61,9 +64,12 @@ ieee80211_try_rate_control_ops_get(const char *name) struct rate_control_alg *alg; struct rate_control_ops *ops = NULL; + if (!name) + return NULL; + mutex_lock(&rate_ctrl_mutex); list_for_each_entry(alg, &rate_ctrl_algs, list) { - if (!name || !strcmp(alg->ops->name, name)) + if (!strcmp(alg->ops->name, name)) if (try_module_get(alg->ops->module)) { ops = alg->ops; break; @@ -80,9 +86,12 @@ ieee80211_rate_control_ops_get(const char *name) { struct rate_control_ops *ops; + if (!name) + name = "simple"; + ops = ieee80211_try_rate_control_ops_get(name); if (!ops) { - request_module("rc80211_%s", name ? name : "default"); + request_module("rc80211_%s", name); ops = ieee80211_try_rate_control_ops_get(name); } return ops; diff --git a/net/mac80211/ieee80211_rate.h b/net/mac80211/ieee80211_rate.h index 7cd1ebab4f83..23688139ffb3 100644 --- a/net/mac80211/ieee80211_rate.h +++ b/net/mac80211/ieee80211_rate.h @@ -65,6 +65,9 @@ struct rate_control_ref { struct kref kref; }; +/* default 'simple' algorithm */ +extern struct rate_control_ops mac80211_rcsimple; + int ieee80211_rate_control_register(struct rate_control_ops *ops); void ieee80211_rate_control_unregister(struct rate_control_ops *ops); diff --git a/net/mac80211/rc80211_simple.c b/net/mac80211/rc80211_simple.c index 314b8de88862..da72737364e4 100644 --- a/net/mac80211/rc80211_simple.c +++ b/net/mac80211/rc80211_simple.c @@ -7,7 +7,6 @@ * published by the Free Software Foundation. */ -#include #include #include #include @@ -29,8 +28,6 @@ #define RATE_CONTROL_INTERVAL (HZ / 20) #define RATE_CONTROL_MIN_TX 10 -MODULE_ALIAS("rc80211_default"); - static void rate_control_rate_inc(struct ieee80211_local *local, struct sta_info *sta) { @@ -394,8 +391,7 @@ static void rate_control_simple_remove_sta_debugfs(void *priv, void *priv_sta) } #endif -static struct rate_control_ops rate_control_simple = { - .module = THIS_MODULE, +struct rate_control_ops mac80211_rcsimple = { .name = "simple", .tx_status = rate_control_simple_tx_status, .get_rate = rate_control_simple_get_rate, @@ -410,22 +406,3 @@ static struct rate_control_ops rate_control_simple = { .remove_sta_debugfs = rate_control_simple_remove_sta_debugfs, #endif }; - - -static int __init rate_control_simple_init(void) -{ - return ieee80211_rate_control_register(&rate_control_simple); -} - - -static void __exit rate_control_simple_exit(void) -{ - ieee80211_rate_control_unregister(&rate_control_simple); -} - - -subsys_initcall(rate_control_simple_init); -module_exit(rate_control_simple_exit); - -MODULE_DESCRIPTION("Simple rate control algorithm for ieee80211"); -MODULE_LICENSE("GPL"); From 2bf236d55e5ea2b92ed5235af09997c2995b316b Mon Sep 17 00:00:00 2001 From: Michael Buesch Date: Sun, 28 Oct 2007 14:39:02 +0100 Subject: [PATCH 21/39] rfkill: Use subsys_initcall We must use subsys_initcall, because we must initialize before a driver calls rfkill_register(). Signed-off-by: Michael Buesch Signed-off-by: Ivo van Doorn Signed-off-by: John W. Linville --- net/rfkill/rfkill.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c index c6a9412fe5bd..bbfbb9e7a917 100644 --- a/net/rfkill/rfkill.c +++ b/net/rfkill/rfkill.c @@ -449,5 +449,5 @@ static void __exit rfkill_exit(void) class_unregister(&rfkill_class); } -module_init(rfkill_init); +subsys_initcall(rfkill_init); module_exit(rfkill_exit); From 999acd9c339a761a18d625b13001612ac396ee00 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 28 Oct 2007 14:49:33 +0100 Subject: [PATCH 22/39] mac80211: don't allow registering the same rate control twice Previously, mac80211 would allow registering the same rate control algorithm twice. This is a programming error in the registration and should not happen; additionally the second version could never be selected. Disallow this and warn about it. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ieee80211_rate.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/mac80211/ieee80211_rate.c b/net/mac80211/ieee80211_rate.c index e4bd8481554d..7254bd609839 100644 --- a/net/mac80211/ieee80211_rate.c +++ b/net/mac80211/ieee80211_rate.c @@ -28,13 +28,22 @@ int ieee80211_rate_control_register(struct rate_control_ops *ops) if (!ops->name) return -EINVAL; + mutex_lock(&rate_ctrl_mutex); + list_for_each_entry(alg, &rate_ctrl_algs, list) { + if (!strcmp(alg->ops->name, ops->name)) { + /* don't register an algorithm twice */ + WARN_ON(1); + return -EALREADY; + } + } + alg = kzalloc(sizeof(*alg), GFP_KERNEL); if (alg == NULL) { + mutex_unlock(&rate_ctrl_mutex); return -ENOMEM; } alg->ops = ops; - mutex_lock(&rate_ctrl_mutex); list_add_tail(&alg->list, &rate_ctrl_algs); mutex_unlock(&rate_ctrl_mutex); From 830f903866a1611e9ce53f3e35202302bb938946 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 28 Oct 2007 14:51:05 +0100 Subject: [PATCH 23/39] mac80211: allow driver to ask for a rate control algorithm This allows a driver to ask for a specific rate control algorithm. The rate control algorithm asked for must be registered and be available as a module or built-in. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 5 +++++ net/mac80211/ieee80211.c | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 5fcc4c104340..f13c378fde91 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -706,11 +706,16 @@ enum ieee80211_hw_flags { * * @queues: number of available hardware transmit queues for * data packets. WMM/QoS requires at least four. + * + * @rate_control_algorithm: rate control algorithm for this hardware. + * If unset (NULL), the default algorithm will be used. Must be + * set before calling ieee80211_register_hw(). */ struct ieee80211_hw { struct ieee80211_conf conf; struct wiphy *wiphy; struct workqueue_struct *workqueue; + const char *rate_control_algorithm; void *priv; u32 flags; unsigned int extra_tx_headroom; diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c index 52ba6ef90685..e0ee65a969bc 100644 --- a/net/mac80211/ieee80211.c +++ b/net/mac80211/ieee80211.c @@ -1072,7 +1072,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) ieee80211_debugfs_add_netdev(IEEE80211_DEV_TO_SUB_IF(local->mdev)); ieee80211_if_set_type(local->mdev, IEEE80211_IF_TYPE_AP); - result = ieee80211_init_rate_ctrl_alg(local, NULL); + result = ieee80211_init_rate_ctrl_alg(local, + hw->rate_control_algorithm); if (result < 0) { printk(KERN_DEBUG "%s: Failed to initialize rate control " "algorithm\n", wiphy_name(local->hw.wiphy)); From f51359a8fb1bb00ae87051991e59d0f92d90604b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 28 Oct 2007 14:53:36 +0100 Subject: [PATCH 24/39] iwlwifi: select proper rate control algorithm Prior to this patch, iwlwifi would always use the first registered rate control algorithm which, depending on system setup, could be anything. After the mac80211 patch to make the simple algorithm built-in, it would always be simple. This has always been a bug in iwlwifi. This fixes it by requesting that mac80211 selects the right rate control algorithm. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl3945-base.c | 2 ++ drivers/net/wireless/iwlwifi/iwl4965-base.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index 4f22a7174caf..be7c9f42a340 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -8354,6 +8354,8 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } SET_IEEE80211_DEV(hw, &pdev->dev); + hw->rate_control_algorithm = "iwl-3945-rs"; + IWL_DEBUG_INFO("*** LOAD DRIVER ***\n"); priv = hw->priv; priv->hw = hw; diff --git a/drivers/net/wireless/iwlwifi/iwl4965-base.c b/drivers/net/wireless/iwlwifi/iwl4965-base.c index d60adcb9bd4a..6757c6c1b25a 100644 --- a/drivers/net/wireless/iwlwifi/iwl4965-base.c +++ b/drivers/net/wireless/iwlwifi/iwl4965-base.c @@ -8955,6 +8955,8 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } SET_IEEE80211_DEV(hw, &pdev->dev); + hw->rate_control_algorithm = "iwl-4965-rs"; + IWL_DEBUG_INFO("*** LOAD DRIVER ***\n"); priv = hw->priv; priv->hw = hw; From 7319f1e6bcf04abd2eddb19747b0933a76f839ce Mon Sep 17 00:00:00 2001 From: Michael Buesch Date: Sun, 28 Oct 2007 15:16:50 +0100 Subject: [PATCH 25/39] rfkill: Use mutex_lock() at register and add sanity check Replace mutex_lock_interruptible() by mutex_lock() in rfkill_register(), as interruptible doesn't make sense there. Add a sanity check for rfkill->type, as that's used for an unchecked dereference in an array and might cause hard to debug crashes if the driver sets this to an invalid value. Signed-off-by: Michael Buesch Signed-off-by: Ivo van Doorn Signed-off-by: John W. Linville --- net/rfkill/rfkill.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c index bbfbb9e7a917..47e8cd0322c0 100644 --- a/net/rfkill/rfkill.c +++ b/net/rfkill/rfkill.c @@ -276,21 +276,17 @@ static struct class rfkill_class = { static int rfkill_add_switch(struct rfkill *rfkill) { - int retval; + int error; - retval = mutex_lock_interruptible(&rfkill_mutex); - if (retval) - return retval; + mutex_lock(&rfkill_mutex); - retval = rfkill_toggle_radio(rfkill, rfkill_states[rfkill->type]); - if (retval) - goto out; + error = rfkill_toggle_radio(rfkill, rfkill_states[rfkill->type]); + if (!error) + list_add_tail(&rfkill->node, &rfkill_list); - list_add_tail(&rfkill->node, &rfkill_list); - - out: mutex_unlock(&rfkill_mutex); - return retval; + + return error; } static void rfkill_remove_switch(struct rfkill *rfkill) @@ -387,6 +383,8 @@ int rfkill_register(struct rfkill *rfkill) if (!rfkill->toggle_radio) return -EINVAL; + if (rfkill->type >= RFKILL_TYPE_MAX) + return -EINVAL; snprintf(dev->bus_id, sizeof(dev->bus_id), "rfkill%ld", (long)atomic_inc_return(&rfkill_no) - 1); From 2736622344e9af9801392edf9e733e8a8f6931d1 Mon Sep 17 00:00:00 2001 From: Michael Buesch Date: Fri, 2 Nov 2007 20:18:11 +0100 Subject: [PATCH 26/39] rfkill: Fix sparse warning Signed-off-by: John W. Linville --- net/rfkill/rfkill.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c index 47e8cd0322c0..73d60a307129 100644 --- a/net/rfkill/rfkill.c +++ b/net/rfkill/rfkill.c @@ -27,6 +27,10 @@ #include #include +/* Get declaration of rfkill_switch_all() to shut up sparse. */ +#include "rfkill-input.h" + + MODULE_AUTHOR("Ivo van Doorn "); MODULE_VERSION("1.0"); MODULE_DESCRIPTION("RF switch support"); From 6b345dd902a83b441d6a05e5541b84567aa1d6e4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 5 Nov 2007 14:32:35 +0100 Subject: [PATCH 27/39] softmac: MAINTAINERS update This patch marks softmac as obsolete in MAINTAINERS and removes Joe and myself as maintainers, we're no longer using it nor interested in the code in any way. Also remove the website reference because I took it offline. Hopefully the code will go away in 2.6.25. Signed-off-by: Johannes Berg Acked-by: Joseph Jezak Signed-off-by: John W. Linville --- MAINTAINERS | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 6a9702726239..cad0882754a6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3454,15 +3454,10 @@ L: lm-sensors@lm-sensors.org S: Maintained SOFTMAC LAYER (IEEE 802.11) -P: Johannes Berg -M: johannes@sipsolutions.net -P: Joe Jezak -M: josejx@gentoo.org P: Daniel Drake M: dsd@gentoo.org -W: http://softmac.sipsolutions.net/ L: linux-wireless@vger.kernel.org -S: Maintained +S: Obsolete SOFTWARE RAID (Multiple Disks) SUPPORT P: Ingo Molnar From 8636bf6513d8eae228c049adeac29fe6cd0739f1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 6 Nov 2007 11:23:29 +0100 Subject: [PATCH 28/39] mac80211: remove ieee80211_common.h Robert pointed out that I missed this file when removing the management interface. Do it now. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ieee80211_common.h | 91 --------------------------------- 1 file changed, 91 deletions(-) delete mode 100644 net/mac80211/ieee80211_common.h diff --git a/net/mac80211/ieee80211_common.h b/net/mac80211/ieee80211_common.h deleted file mode 100644 index c15295d43d87..000000000000 --- a/net/mac80211/ieee80211_common.h +++ /dev/null @@ -1,91 +0,0 @@ -/* - * IEEE 802.11 driver (80211.o) -- hostapd interface - * Copyright 2002-2004, Instant802 Networks, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef IEEE80211_COMMON_H -#define IEEE80211_COMMON_H - -#include - -/* - * This is common header information with user space. It is used on all - * frames sent to wlan#ap interface. - */ - -#define IEEE80211_FI_VERSION 0x80211001 - -struct ieee80211_frame_info { - __be32 version; - __be32 length; - __be64 mactime; - __be64 hosttime; - __be32 phytype; - __be32 channel; - __be32 datarate; - __be32 antenna; - __be32 priority; - __be32 ssi_type; - __be32 ssi_signal; - __be32 ssi_noise; - __be32 preamble; - __be32 encoding; - - /* Note: this structure is otherwise identical to capture format used - * in linux-wlan-ng, but this additional field is used to provide meta - * data about the frame to hostapd. This was the easiest method for - * providing this information, but this might change in the future. */ - __be32 msg_type; -} __attribute__ ((packed)); - - -enum ieee80211_msg_type { - ieee80211_msg_normal = 0, - ieee80211_msg_tx_callback_ack = 1, - ieee80211_msg_tx_callback_fail = 2, - /* hole at 3, was ieee80211_msg_passive_scan but unused */ - /* hole at 4, was ieee80211_msg_wep_frame_unknown_key but now unused */ - ieee80211_msg_michael_mic_failure = 5, - /* hole at 6, was monitor but never sent to userspace */ - ieee80211_msg_sta_not_assoc = 7, - /* 8 was ieee80211_msg_set_aid_for_sta */ - /* 9 was ieee80211_msg_key_threshold_notification */ - /* 11 was ieee80211_msg_radar */ -}; - -struct ieee80211_msg_key_notification { - int tx_rx_count; - char ifname[IFNAMSIZ]; - u8 addr[ETH_ALEN]; /* ff:ff:ff:ff:ff:ff for broadcast keys */ -}; - - -enum ieee80211_phytype { - ieee80211_phytype_fhss_dot11_97 = 1, - ieee80211_phytype_dsss_dot11_97 = 2, - ieee80211_phytype_irbaseband = 3, - ieee80211_phytype_dsss_dot11_b = 4, - ieee80211_phytype_pbcc_dot11_b = 5, - ieee80211_phytype_ofdm_dot11_g = 6, - ieee80211_phytype_pbcc_dot11_g = 7, - ieee80211_phytype_ofdm_dot11_a = 8, -}; - -enum ieee80211_ssi_type { - ieee80211_ssi_none = 0, - ieee80211_ssi_norm = 1, /* normalized, 0-1000 */ - ieee80211_ssi_dbm = 2, - ieee80211_ssi_raw = 3, /* raw SSI */ -}; - -struct ieee80211_radar_info { - int channel; - int radar; - int radar_type; -}; - -#endif /* IEEE80211_COMMON_H */ From 56db6c52bb61509c114b9f1b1eecc7461229770a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 30 Oct 2007 15:58:18 +0100 Subject: [PATCH 29/39] mac80211: remove unused driver ops The driver operations set_ieee8021x(), set_port_auth() and set_privacy_invoked() are not used by any drivers, except set_privacy_invoked() they aren't even used by mac80211. Remove them at least until we need to support drivers with mac80211 that require getting this information. Signed-off-by: Johannes Berg Acked-by: Michael Wu Signed-off-by: John W. Linville --- include/net/mac80211.h | 21 --------------------- net/mac80211/ieee80211_ioctl.c | 7 +------ 2 files changed, 1 insertion(+), 27 deletions(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index f13c378fde91..17b60391fcd6 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -941,27 +941,11 @@ enum ieee80211_erp_change_flags { * and remove_interface calls, i.e. while the interface with the * given local_address is enabled. * - * @set_ieee8021x: Enable/disable IEEE 802.1X. This item requests wlan card - * to pass unencrypted EAPOL-Key frames even when encryption is - * configured. If the wlan card does not require such a configuration, - * this function pointer can be set to NULL. - * - * @set_port_auth: Set port authorization state (IEEE 802.1X PAE) to be - * authorized (@authorized=1) or unauthorized (=0). This function can be - * used if the wlan hardware or low-level driver implements PAE. - * mac80211 will filter frames based on authorization state in any case, - * so this function pointer can be NULL if low-level driver does not - * require event notification about port state changes. - * * @hw_scan: Ask the hardware to service the scan request, no need to start * the scan state machine in stack. * * @get_stats: return low-level statistics * - * @set_privacy_invoked: For devices that generate their own beacons and probe - * response or association responses this updates the state of privacy_invoked - * returns 0 for success or an error number. - * * @get_sequence_counter: For devices that have internal sequence counters this * callback allows mac80211 to access the current value of a counter. * This callback seems not well-defined, tell us if you need it. @@ -1034,14 +1018,9 @@ struct ieee80211_ops { int (*set_key)(struct ieee80211_hw *hw, enum set_key_cmd cmd, const u8 *local_address, const u8 *address, struct ieee80211_key_conf *key); - int (*set_ieee8021x)(struct ieee80211_hw *hw, int use_ieee8021x); - int (*set_port_auth)(struct ieee80211_hw *hw, u8 *addr, - int authorized); int (*hw_scan)(struct ieee80211_hw *hw, u8 *ssid, size_t len); int (*get_stats)(struct ieee80211_hw *hw, struct ieee80211_low_level_stats *stats); - int (*set_privacy_invoked)(struct ieee80211_hw *hw, - int privacy_invoked); int (*get_sequence_counter)(struct ieee80211_hw *hw, u8* addr, u8 keyidx, u8 txrx, u32* iv32, u16* iv16); diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c index 6caa3ec2cff7..3645660a364a 100644 --- a/net/mac80211/ieee80211_ioctl.c +++ b/net/mac80211/ieee80211_ioctl.c @@ -917,7 +917,6 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev, struct iw_request_info *info, struct iw_param *data, char *extra) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); int ret = 0; @@ -927,6 +926,7 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev, case IW_AUTH_CIPHER_GROUP: case IW_AUTH_WPA_ENABLED: case IW_AUTH_RX_UNENCRYPTED_EAPOL: + case IW_AUTH_PRIVACY_INVOKED: break; case IW_AUTH_KEY_MGMT: if (sdata->type != IEEE80211_IF_TYPE_STA) @@ -948,11 +948,6 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev, else ret = -EOPNOTSUPP; break; - case IW_AUTH_PRIVACY_INVOKED: - if (local->ops->set_privacy_invoked) - ret = local->ops->set_privacy_invoked( - local_to_hw(local), data->value); - break; default: ret = -EOPNOTSUPP; break; From 5b98b1f7daf6d52ccc446486aca0b8bc9a588b2c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 3 Nov 2007 13:11:10 +0000 Subject: [PATCH 30/39] mac80211: use IW_AUTH_PRIVACY_INVOKED rather than IW_AUTH_KEY_MGMT In the long bug-hunt for why dynamic WEP networks didn't work it turned out that mac80211 incorrectly uses IW_AUTH_KEY_MGMT while it should use IW_AUTH_PRIVACY_INVOKED to determine whether to associate to protected networks or not. This patch changes the behaviour to be that way and clarifies the existing code. Signed-off-by: Johannes Berg Cc: Jouni Malinen Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/ieee80211_ioctl.c | 16 +++++++++------- net/mac80211/ieee80211_sta.c | 18 +++++++++++------- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 4b4ed2a5803c..b4e32ab3664d 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -230,6 +230,7 @@ struct ieee80211_if_vlan { #define IEEE80211_STA_AUTO_SSID_SEL BIT(10) #define IEEE80211_STA_AUTO_BSSID_SEL BIT(11) #define IEEE80211_STA_AUTO_CHANNEL_SEL BIT(12) +#define IEEE80211_STA_PRIVACY_INVOKED BIT(13) struct ieee80211_if_sta { enum { IEEE80211_DISABLED, IEEE80211_AUTHENTICATE, @@ -259,7 +260,6 @@ struct ieee80211_if_sta { unsigned long request; struct sk_buff_head skb_queue; - int key_management_enabled; unsigned long last_probe; #define IEEE80211_AUTH_ALG_OPEN BIT(0) diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c index 3645660a364a..7027eed4d4ae 100644 --- a/net/mac80211/ieee80211_ioctl.c +++ b/net/mac80211/ieee80211_ioctl.c @@ -926,19 +926,21 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev, case IW_AUTH_CIPHER_GROUP: case IW_AUTH_WPA_ENABLED: case IW_AUTH_RX_UNENCRYPTED_EAPOL: - case IW_AUTH_PRIVACY_INVOKED: - break; case IW_AUTH_KEY_MGMT: + break; + case IW_AUTH_PRIVACY_INVOKED: if (sdata->type != IEEE80211_IF_TYPE_STA) ret = -EINVAL; else { + sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED; /* - * Key management was set by wpa_supplicant, - * we only need this to associate to a network - * that has privacy enabled regardless of not - * having a key. + * Privacy invoked by wpa_supplicant, store the + * value and allow associating to a protected + * network without having a key up front. */ - sdata->u.sta.key_management_enabled = !!data->value; + if (data->value) + sdata->u.sta.flags |= + IEEE80211_STA_PRIVACY_INVOKED; } break; case IW_AUTH_80211_AUTH_ALG: diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c index fda0e06453e8..2079e988fc56 100644 --- a/net/mac80211/ieee80211_sta.c +++ b/net/mac80211/ieee80211_sta.c @@ -704,10 +704,11 @@ static int ieee80211_privacy_mismatch(struct net_device *dev, { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sta_bss *bss; - int res = 0; + int bss_privacy; + int wep_privacy; + int privacy_invoked; - if (!ifsta || (ifsta->flags & IEEE80211_STA_MIXED_CELL) || - ifsta->key_management_enabled) + if (!ifsta || (ifsta->flags & IEEE80211_STA_MIXED_CELL)) return 0; bss = ieee80211_rx_bss_get(dev, ifsta->bssid, local->hw.conf.channel, @@ -715,13 +716,16 @@ static int ieee80211_privacy_mismatch(struct net_device *dev, if (!bss) return 0; - if (ieee80211_sta_wep_configured(dev) != - !!(bss->capability & WLAN_CAPABILITY_PRIVACY)) - res = 1; + bss_privacy = !!(bss->capability & WLAN_CAPABILITY_PRIVACY); + wep_privacy = !!ieee80211_sta_wep_configured(dev); + privacy_invoked = !!(ifsta->flags & IEEE80211_STA_PRIVACY_INVOKED); ieee80211_rx_bss_put(dev, bss); - return res; + if ((bss_privacy == wep_privacy) || (bss_privacy == privacy_invoked)) + return 0; + + return 1; } From 7f3ad8943e2e6336ba229b208be8c7a80730c5d4 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Tue, 6 Nov 2007 17:12:31 -0500 Subject: [PATCH 31/39] mac80211: make "decrypt failed" messages conditional upon MAC80211_DEBUG Make "decrypt failed" and "have no key" debugging messages compile conditionally upon CONFIG_MAC80211_DEBUG. They have been useful for finding certain problems in the past, but in many cases they just clutter a user's logs. A typical example is an enviornment where multiple SSIDs are using a single BSSID but with different protection schemes or different keys for each SSID. In such an environment these messages are just noise. Let's just leave them for those interested enough to turn-on debugging. Signed-off-by: John W. Linville --- net/mac80211/rx.c | 2 ++ net/mac80211/wep.c | 2 ++ net/mac80211/wpa.c | 18 ++++++++++++------ 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index ece77766ea2b..428a9fcf57d6 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -509,9 +509,11 @@ ieee80211_rx_h_decrypt(struct ieee80211_txrx_data *rx) rx->key->tx_rx_count++; /* TODO: add threshold stuff again */ } else { +#ifdef CONFIG_MAC80211_DEBUG if (net_ratelimit()) printk(KERN_DEBUG "%s: RX protected frame," " but have no key\n", rx->dev->name); +#endif /* CONFIG_MAC80211_DEBUG */ return TXRX_DROP; } diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index a84a23310ff4..9bf0e1cc530a 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -314,9 +314,11 @@ ieee80211_crypto_wep_decrypt(struct ieee80211_txrx_data *rx) if (!(rx->u.rx.status->flag & RX_FLAG_DECRYPTED)) { if (ieee80211_wep_decrypt(rx->local, rx->skb, rx->key)) { +#ifdef CONFIG_MAC80211_DEBUG if (net_ratelimit()) printk(KERN_DEBUG "%s: RX WEP frame, decrypt " "failed\n", rx->dev->name); +#endif /* CONFIG_MAC80211_DEBUG */ return TXRX_DROP; } } else if (!(rx->u.rx.status->flag & RX_FLAG_IV_STRIPPED)) { diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 6695efba57ec..20cec1cb956f 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -323,9 +323,12 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_txrx_data *rx) &rx->u.rx.tkip_iv32, &rx->u.rx.tkip_iv16); if (res != TKIP_DECRYPT_OK || wpa_test) { - printk(KERN_DEBUG "%s: TKIP decrypt failed for RX frame from " - "%s (res=%d)\n", - rx->dev->name, print_mac(mac, rx->sta->addr), res); +#ifdef CONFIG_MAC80211_DEBUG + if (net_ratelimit()) + printk(KERN_DEBUG "%s: TKIP decrypt failed for RX " + "frame from %s (res=%d)\n", rx->dev->name, + print_mac(mac, rx->sta->addr), res); +#endif /* CONFIG_MAC80211_DEBUG */ return TXRX_DROP; } @@ -594,9 +597,12 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_txrx_data *rx) skb->data + hdrlen + CCMP_HDR_LEN, data_len, skb->data + skb->len - CCMP_MIC_LEN, skb->data + hdrlen + CCMP_HDR_LEN)) { - printk(KERN_DEBUG "%s: CCMP decrypt failed for RX " - "frame from %s\n", rx->dev->name, - print_mac(mac, rx->sta->addr)); +#ifdef CONFIG_MAC80211_DEBUG + if (net_ratelimit()) + printk(KERN_DEBUG "%s: CCMP decrypt failed " + "for RX frame from %s\n", rx->dev->name, + print_mac(mac, rx->sta->addr)); +#endif /* CONFIG_MAC80211_DEBUG */ return TXRX_DROP; } } From d52a60ad389d8aeac162350b19e4303c6cde7f93 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 7 Nov 2007 16:41:13 +0100 Subject: [PATCH 32/39] mac80211: fix MAC80211_RCSIMPLE Kconfig I meant for this to be selectable only with EMBEDDED, not enabled only with EMBEDDED. This does it that way. Sorry. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 32c8c08c4683..ce176e691afe 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -14,9 +14,9 @@ config MAC80211 networking stack. config MAC80211_RCSIMPLE - bool "'simple' rate control algorithm" + bool "'simple' rate control algorithm" if EMBEDDED default y - depends on MAC80211 && EMBEDDED + depends on MAC80211 help This option allows you to turn off the 'simple' rate control algorithm in mac80211. If you do turn it off, From 60d78c4473493674531a1df0772ca9e4d6133a62 Mon Sep 17 00:00:00 2001 From: Michael Buesch Date: Wed, 7 Nov 2007 19:03:35 +0100 Subject: [PATCH 33/39] ssb: Fix PCMCIA-host lowlevel bus access This fixes the lowlevel bus access routines for PCMCIA based devices. There are still a few issues with register access sideeffects after this patch. This will be addressed in a later patch. Signed-off-by: Michael Buesch Signed-off-by: John W. Linville --- drivers/ssb/main.c | 1 + drivers/ssb/pcmcia.c | 56 ++++++++++++++++++++++---------------------- 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c index fc1d589dc675..85a20546e827 100644 --- a/drivers/ssb/main.c +++ b/drivers/ssb/main.c @@ -440,6 +440,7 @@ static int ssb_devices_register(struct ssb_bus *bus) break; case SSB_BUSTYPE_PCMCIA: #ifdef CONFIG_SSB_PCMCIAHOST + sdev->irq = bus->host_pcmcia->irq.AssignedIRQ; dev->parent = &bus->host_pcmcia->dev; #endif break; diff --git a/drivers/ssb/pcmcia.c b/drivers/ssb/pcmcia.c index b6abee846f02..bb44a76b3eb5 100644 --- a/drivers/ssb/pcmcia.c +++ b/drivers/ssb/pcmcia.c @@ -63,17 +63,17 @@ int ssb_pcmcia_switch_coreidx(struct ssb_bus *bus, err = pcmcia_access_configuration_register(pdev, ®); if (err != CS_SUCCESS) goto error; - read_addr |= (reg.Value & 0xF) << 12; + read_addr |= ((u32)(reg.Value & 0x0F)) << 12; reg.Offset = 0x30; err = pcmcia_access_configuration_register(pdev, ®); if (err != CS_SUCCESS) goto error; - read_addr |= reg.Value << 16; + read_addr |= ((u32)reg.Value) << 16; reg.Offset = 0x32; err = pcmcia_access_configuration_register(pdev, ®); if (err != CS_SUCCESS) goto error; - read_addr |= reg.Value << 24; + read_addr |= ((u32)reg.Value) << 24; cur_core = (read_addr - SSB_ENUM_BASE) / SSB_CORE_SIZE; if (cur_core == coreidx) @@ -152,28 +152,29 @@ error: goto out_unlock; } -/* These are the main device register access functions. - * do_select_core is inline to have the likely hotpath inline. - * All unlikely codepaths are out-of-line. */ -static inline int do_select_core(struct ssb_bus *bus, - struct ssb_device *dev, - u16 *offset) +static int select_core_and_segment(struct ssb_device *dev, + u16 *offset) { + struct ssb_bus *bus = dev->bus; int err; - u8 need_seg = (*offset >= 0x800) ? 1 : 0; + u8 need_segment; + + if (*offset >= 0x800) { + *offset -= 0x800; + need_segment = 1; + } else + need_segment = 0; if (unlikely(dev != bus->mapped_device)) { err = ssb_pcmcia_switch_core(bus, dev); if (unlikely(err)) return err; } - if (unlikely(need_seg != bus->mapped_pcmcia_seg)) { - err = ssb_pcmcia_switch_segment(bus, need_seg); + if (unlikely(need_segment != bus->mapped_pcmcia_seg)) { + err = ssb_pcmcia_switch_segment(bus, need_segment); if (unlikely(err)) return err; } - if (need_seg == 1) - *offset -= 0x800; return 0; } @@ -181,32 +182,31 @@ static inline int do_select_core(struct ssb_bus *bus, static u16 ssb_pcmcia_read16(struct ssb_device *dev, u16 offset) { struct ssb_bus *bus = dev->bus; - u16 x; - if (unlikely(do_select_core(bus, dev, &offset))) + if (unlikely(select_core_and_segment(dev, &offset))) return 0xFFFF; - x = readw(bus->mmio + offset); - return x; + return readw(bus->mmio + offset); } static u32 ssb_pcmcia_read32(struct ssb_device *dev, u16 offset) { struct ssb_bus *bus = dev->bus; - u32 x; + u32 lo, hi; - if (unlikely(do_select_core(bus, dev, &offset))) + if (unlikely(select_core_and_segment(dev, &offset))) return 0xFFFFFFFF; - x = readl(bus->mmio + offset); + lo = readw(bus->mmio + offset); + hi = readw(bus->mmio + offset + 2); - return x; + return (lo | (hi << 16)); } static void ssb_pcmcia_write16(struct ssb_device *dev, u16 offset, u16 value) { struct ssb_bus *bus = dev->bus; - if (unlikely(do_select_core(bus, dev, &offset))) + if (unlikely(select_core_and_segment(dev, &offset))) return; writew(value, bus->mmio + offset); } @@ -215,12 +215,12 @@ static void ssb_pcmcia_write32(struct ssb_device *dev, u16 offset, u32 value) { struct ssb_bus *bus = dev->bus; - if (unlikely(do_select_core(bus, dev, &offset))) + if (unlikely(select_core_and_segment(dev, &offset))) return; - readw(bus->mmio + offset); - writew(value >> 16, bus->mmio + offset + 2); - readw(bus->mmio + offset); - writew(value, bus->mmio + offset); + writeb((value & 0xFF000000) >> 24, bus->mmio + offset + 3); + writeb((value & 0x00FF0000) >> 16, bus->mmio + offset + 2); + writeb((value & 0x0000FF00) >> 8, bus->mmio + offset + 1); + writeb((value & 0x000000FF) >> 0, bus->mmio + offset + 0); } /* Not "static", as it's used in main.c */ From 8032b46489e50ef8f3992159abd0349b5b8e476c Mon Sep 17 00:00:00 2001 From: Peter P Waskiewicz Jr Date: Sat, 10 Nov 2007 22:03:25 -0800 Subject: [PATCH 34/39] [AF_PACKET]: Allow multicast traffic to be caught by ORIGDEV when bonded The socket option for packet sockets to return the original ifindex instead of the bonded ifindex will not match multicast traffic. Since this socket option is the most useful for layer 2 traffic and multicast traffic, make the option multicast-aware. Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: David S. Miller --- net/packet/af_packet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 36331a5f0abe..eb6be5030c70 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -509,7 +509,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet sll->sll_hatype = dev->type; sll->sll_protocol = skb->protocol; sll->sll_pkttype = skb->pkt_type; - if (unlikely(po->origdev) && skb->pkt_type == PACKET_HOST) + if (unlikely(po->origdev)) sll->sll_ifindex = orig_dev->ifindex; else sll->sll_ifindex = dev->ifindex; @@ -655,7 +655,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe sll->sll_hatype = dev->type; sll->sll_protocol = skb->protocol; sll->sll_pkttype = skb->pkt_type; - if (unlikely(po->origdev) && skb->pkt_type == PACKET_HOST) + if (unlikely(po->origdev)) sll->sll_ifindex = orig_dev->ifindex; else sll->sll_ifindex = dev->ifindex; From 9305cfa4443dbfb99faf35c5603ec0c0e91b5ef8 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Sat, 10 Nov 2007 22:06:01 -0800 Subject: [PATCH 35/39] [AF_UNIX]: Make unix_tot_inflight counter non-atomic This counter is _always_ modified under the unix_gc_lock spinlock, so its atomicity can be provided w/o additional efforts. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/af_unix.h | 2 +- net/unix/af_unix.c | 2 +- net/unix/garbage.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 0864a775de24..a1c805d7f488 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -12,7 +12,7 @@ extern void unix_gc(void); #define UNIX_HASH_SIZE 256 -extern atomic_t unix_tot_inflight; +extern unsigned int unix_tot_inflight; struct unix_address { atomic_t refcnt; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 515e7a692f9b..ab9048ac197f 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -457,7 +457,7 @@ static int unix_release_sock (struct sock *sk, int embrion) * What the above comment does talk about? --ANK(980817) */ - if (atomic_read(&unix_tot_inflight)) + if (unix_tot_inflight) unix_gc(); /* Garbage collect fds */ return 0; diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 406b6433e467..399717ed7b9d 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -92,7 +92,7 @@ static LIST_HEAD(gc_inflight_list); static LIST_HEAD(gc_candidates); static DEFINE_SPINLOCK(unix_gc_lock); -atomic_t unix_tot_inflight = ATOMIC_INIT(0); +unsigned int unix_tot_inflight; static struct sock *unix_get_socket(struct file *filp) @@ -133,7 +133,7 @@ void unix_inflight(struct file *fp) } else { BUG_ON(list_empty(&u->link)); } - atomic_inc(&unix_tot_inflight); + unix_tot_inflight++; spin_unlock(&unix_gc_lock); } } @@ -147,7 +147,7 @@ void unix_notinflight(struct file *fp) BUG_ON(list_empty(&u->link)); if (atomic_dec_and_test(&u->inflight)) list_del_init(&u->link); - atomic_dec(&unix_tot_inflight); + unix_tot_inflight--; spin_unlock(&unix_gc_lock); } } From 5c80f1ae9842a8b7985acd0f02efb9828effb05f Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Sat, 10 Nov 2007 22:07:13 -0800 Subject: [PATCH 36/39] [AF_UNIX]: Convert socks to unix_socks in scan_inflight, not in callbacks The scan_inflight() routine scans through the unix sockets and calls some passed callback. The fact is that all these callbacks work with the unix_sock objects, not the sock ones, so make this conversion in the scan_inflight() before calling the callbacks. This removes one unneeded variable from the inc_inflight_move_tail(). Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/unix/garbage.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 399717ed7b9d..ebdff3d877a1 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -161,7 +161,7 @@ static inline struct sk_buff *sock_queue_head(struct sock *sk) for (skb = sock_queue_head(sk)->next, next = skb->next; \ skb != sock_queue_head(sk); skb = next, next = skb->next) -static void scan_inflight(struct sock *x, void (*func)(struct sock *), +static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), struct sk_buff_head *hitlist) { struct sk_buff *skb; @@ -185,9 +185,9 @@ static void scan_inflight(struct sock *x, void (*func)(struct sock *), * if it indeed does so */ struct sock *sk = unix_get_socket(*fp++); - if(sk) { + if (sk) { hit = true; - func(sk); + func(unix_sk(sk)); } } if (hit && hitlist != NULL) { @@ -199,7 +199,7 @@ static void scan_inflight(struct sock *x, void (*func)(struct sock *), spin_unlock(&x->sk_receive_queue.lock); } -static void scan_children(struct sock *x, void (*func)(struct sock *), +static void scan_children(struct sock *x, void (*func)(struct unix_sock *), struct sk_buff_head *hitlist) { if (x->sk_state != TCP_LISTEN) @@ -235,20 +235,18 @@ static void scan_children(struct sock *x, void (*func)(struct sock *), } } -static void dec_inflight(struct sock *sk) +static void dec_inflight(struct unix_sock *usk) { - atomic_dec(&unix_sk(sk)->inflight); + atomic_dec(&usk->inflight); } -static void inc_inflight(struct sock *sk) +static void inc_inflight(struct unix_sock *usk) { - atomic_inc(&unix_sk(sk)->inflight); + atomic_inc(&usk->inflight); } -static void inc_inflight_move_tail(struct sock *sk) +static void inc_inflight_move_tail(struct unix_sock *u) { - struct unix_sock *u = unix_sk(sk); - atomic_inc(&u->inflight); /* * If this is still a candidate, move it to the end of the From 284b327be2f86cf751316ff344b6945e580e654f Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Sat, 10 Nov 2007 22:08:30 -0800 Subject: [PATCH 37/39] [UNIX]: The unix_nr_socks limit can be exceeded The unix_nr_socks value is limited with the 2 * get_max_files() value, as seen from the unix_create1(). However, the check and the actual increment are separated with the GFP_KERNEL allocation, so this limit can be exceeded under a memory pressure - task may go to sleep freeing the pages and some other task will be allowed to allocate a new sock and so on and so forth. So make the increment before the check (similar thing is done in the sock_kmalloc) and go to kmalloc after this. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/unix/af_unix.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index ab9048ac197f..e835da8fc091 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -599,15 +599,14 @@ static struct sock * unix_create1(struct net *net, struct socket *sock) struct sock *sk = NULL; struct unix_sock *u; - if (atomic_read(&unix_nr_socks) >= 2*get_max_files()) + atomic_inc(&unix_nr_socks); + if (atomic_read(&unix_nr_socks) > 2 * get_max_files()) goto out; sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto); if (!sk) goto out; - atomic_inc(&unix_nr_socks); - sock_init_data(sock,sk); lockdep_set_class(&sk->sk_receive_queue.lock, &af_unix_sk_receive_queue_lock_key); @@ -625,6 +624,8 @@ static struct sock * unix_create1(struct net *net, struct socket *sock) init_waitqueue_head(&u->peer_wait); unix_insert_socket(unix_sockets_unbound, sk); out: + if (sk == NULL) + atomic_dec(&unix_nr_socks); return sk; } From 33d36bb83c5b566c98a441e791736e25dbc35fc3 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 10 Nov 2007 22:09:25 -0800 Subject: [PATCH 38/39] [NETNS]: init dev_base_lock only once * it already statically initialized * reinitializing live global spinlock every time netns is setup is also wrong Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/core/dev.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index be6cedab5aa8..dd7e30754cbc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4330,7 +4330,6 @@ static struct hlist_head *netdev_create_hash(void) static int __net_init netdev_init(struct net *net) { INIT_LIST_HEAD(&net->dev_base_head); - rwlock_init(&dev_base_lock); net->dev_name_head = netdev_create_hash(); if (net->dev_name_head == NULL) From 2994c63863ac350c4c8c6a65d8110749c2abb95c Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Sat, 10 Nov 2007 22:12:03 -0800 Subject: [PATCH 39/39] [INET]: Small possible memory leak in FIB rules This patch fixes a small memory leak. Default fib rules can be deleted by the user if the rule does not carry FIB_RULE_PERMANENT flag, f.e. by ip rule flush Such a rule will not be freed as the ref-counter has 2 on start and becomes clearly unreachable after removal. Signed-off-by: Denis V. Lunev Acked-by: Alexey Kuznetsov Signed-off-by: David S. Miller --- include/net/fib_rules.h | 3 +++ net/core/fib_rules.c | 22 ++++++++++++++++++ net/decnet/dn_rules.c | 13 ++--------- net/ipv4/fib_rules.c | 51 ++++++++++++++++------------------------- net/ipv6/fib6_rules.c | 37 ++++++++++++------------------ 5 files changed, 62 insertions(+), 64 deletions(-) diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 017aebd90683..41a301e38643 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -107,4 +107,7 @@ extern int fib_rules_unregister(struct fib_rules_ops *); extern int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags, struct fib_lookup_arg *); +extern int fib_default_rule_add(struct fib_rules_ops *, + u32 pref, u32 table, + u32 flags); #endif diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 13de6f53f098..848132b6cb73 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -18,6 +18,28 @@ static LIST_HEAD(rules_ops); static DEFINE_SPINLOCK(rules_mod_lock); +int fib_default_rule_add(struct fib_rules_ops *ops, + u32 pref, u32 table, u32 flags) +{ + struct fib_rule *r; + + r = kzalloc(ops->rule_size, GFP_KERNEL); + if (r == NULL) + return -ENOMEM; + + atomic_set(&r->refcnt, 1); + r->action = FR_ACT_TO_TBL; + r->pref = pref; + r->table = table; + r->flags = flags; + + /* The lock is not required here, the list in unreacheable + * at the moment this function is called */ + list_add_tail(&r->list, &ops->rules_list); + return 0; +} +EXPORT_SYMBOL(fib_default_rule_add); + static void notify_rule_change(int event, struct fib_rule *rule, struct fib_rules_ops *ops, struct nlmsghdr *nlh, u32 pid); diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index ddd3f04f0919..ffebea04cc99 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -48,15 +48,6 @@ struct dn_fib_rule u8 flags; }; -static struct dn_fib_rule default_rule = { - .common = { - .refcnt = ATOMIC_INIT(2), - .pref = 0x7fff, - .table = RT_TABLE_MAIN, - .action = FR_ACT_TO_TBL, - }, -}; - int dn_fib_lookup(struct flowi *flp, struct dn_fib_res *res) { @@ -262,8 +253,8 @@ static struct fib_rules_ops dn_fib_rules_ops = { void __init dn_fib_rules_init(void) { - list_add_tail(&default_rule.common.list, - &dn_fib_rules_ops.rules_list); + BUG_ON(fib_default_rule_add(&dn_fib_rules_ops, 0x7fff, + RT_TABLE_MAIN, 0)); fib_rules_register(&dn_fib_rules_ops); } diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index f16839c6a721..a0ada3a8d8dd 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -49,33 +49,6 @@ struct fib4_rule #endif }; -static struct fib4_rule default_rule = { - .common = { - .refcnt = ATOMIC_INIT(2), - .pref = 0x7FFF, - .table = RT_TABLE_DEFAULT, - .action = FR_ACT_TO_TBL, - }, -}; - -static struct fib4_rule main_rule = { - .common = { - .refcnt = ATOMIC_INIT(2), - .pref = 0x7FFE, - .table = RT_TABLE_MAIN, - .action = FR_ACT_TO_TBL, - }, -}; - -static struct fib4_rule local_rule = { - .common = { - .refcnt = ATOMIC_INIT(2), - .table = RT_TABLE_LOCAL, - .action = FR_ACT_TO_TBL, - .flags = FIB_RULE_PERMANENT, - }, -}; - #ifdef CONFIG_NET_CLS_ROUTE u32 fib_rules_tclass(struct fib_result *res) { @@ -319,11 +292,27 @@ static struct fib_rules_ops fib4_rules_ops = { .owner = THIS_MODULE, }; +static int __init fib_default_rules_init(void) +{ + int err; + + err = fib_default_rule_add(&fib4_rules_ops, 0, + RT_TABLE_LOCAL, FIB_RULE_PERMANENT); + if (err < 0) + return err; + err = fib_default_rule_add(&fib4_rules_ops, 0x7FFE, + RT_TABLE_MAIN, 0); + if (err < 0) + return err; + err = fib_default_rule_add(&fib4_rules_ops, 0x7FFF, + RT_TABLE_DEFAULT, 0); + if (err < 0) + return err; + return 0; +} + void __init fib4_rules_init(void) { - list_add_tail(&local_rule.common.list, &fib4_rules_ops.rules_list); - list_add_tail(&main_rule.common.list, &fib4_rules_ops.rules_list); - list_add_tail(&default_rule.common.list, &fib4_rules_ops.rules_list); - + BUG_ON(fib_default_rules_init()); fib_rules_register(&fib4_rules_ops); } diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 706622af206f..428c6b0e26d8 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -31,25 +31,6 @@ struct fib6_rule static struct fib_rules_ops fib6_rules_ops; -static struct fib6_rule main_rule = { - .common = { - .refcnt = ATOMIC_INIT(2), - .pref = 0x7FFE, - .action = FR_ACT_TO_TBL, - .table = RT6_TABLE_MAIN, - }, -}; - -static struct fib6_rule local_rule = { - .common = { - .refcnt = ATOMIC_INIT(2), - .pref = 0, - .action = FR_ACT_TO_TBL, - .table = RT6_TABLE_LOCAL, - .flags = FIB_RULE_PERMANENT, - }, -}; - struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags, pol_lookup_t lookup) { @@ -270,11 +251,23 @@ static struct fib_rules_ops fib6_rules_ops = { .owner = THIS_MODULE, }; +static int __init fib6_default_rules_init(void) +{ + int err; + + err = fib_default_rule_add(&fib6_rules_ops, 0, + RT6_TABLE_LOCAL, FIB_RULE_PERMANENT); + if (err < 0) + return err; + err = fib_default_rule_add(&fib6_rules_ops, 0x7FFE, RT6_TABLE_MAIN, 0); + if (err < 0) + return err; + return 0; +} + void __init fib6_rules_init(void) { - list_add_tail(&local_rule.common.list, &fib6_rules_ops.rules_list); - list_add_tail(&main_rule.common.list, &fib6_rules_ops.rules_list); - + BUG_ON(fib6_default_rules_init()); fib_rules_register(&fib6_rules_ops); }