summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-04-30 08:45:48 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-30 08:45:48 -0700
commit95dfec6ae1cb8c03406aac612a5642cbddb676b3 (patch)
tree978de715f45de94a8e79eb08a08ca5fb9dfd9dea /net/ipv4
parentae3a0064e6d69068b1c9fd075095da062430bda9 (diff)
parent159131149c2f56c1da5ae5e23ab9d5acef4916d1 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6: (53 commits) tcp: Overflow bug in Vegas [IPv4] UFO: prevent generation of chained skb destined to UFO device iwlwifi: move the selects to the tristate drivers ipv4: annotate a few functions __init in ipconfig.c atm: ambassador: vcc_sf semaphore to mutex MAINTAINERS: The socketcan-core list is subscribers-only. netfilter: nf_conntrack: padding breaks conntrack hash on ARM ipv4: Update MTU to all related cache entries in ip_rt_frag_needed() sch_sfq: use del_timer_sync() in sfq_destroy() net: Add compat support for getsockopt (MCAST_MSFILTER) net: Several cleanups for the setsockopt compat support. ipvs: fix oops in backup for fwmark conn templates bridge: kernel panic when unloading bridge module bridge: fix error handling in br_add_if() netfilter: {nfnetlink,ip,ip6}_queue: fix skb_over_panic when enlarging packets netfilter: x_tables: fix net namespace leak when reading /proc/net/xxx_tables_names netfilter: xt_TCPOPTSTRIP: signed tcphoff for ipv6_skip_exthdr() retval tcp: Limit cwnd growth when deferring for GSO tcp: Allow send-limited cwnd to grow up to max_burst when gso disabled [netdrvr] gianfar: Determine TBIPA value dynamically ...
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/icmp.c3
-rw-r--r--net/ipv4/ip_output.c22
-rw-r--r--net/ipv4/ip_sockglue.c9
-rw-r--r--net/ipv4/ipconfig.c8
-rw-r--r--net/ipv4/ipvs/ip_vs_proto.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_ah.c1
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_esp.c1
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_tcp.c1
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_udp.c1
-rw-r--r--net/ipv4/ipvs/ip_vs_sync.c80
-rw-r--r--net/ipv4/netfilter/ip_queue.c5
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c2
-rw-r--r--net/ipv4/route.c38
-rw-r--r--net/ipv4/tcp_cong.c12
-rw-r--r--net/ipv4/tcp_vegas.c10
-rw-r--r--net/ipv4/tcp_veno.c8
16 files changed, 126 insertions, 77 deletions
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index c67d00e8c600..87397351ddac 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -691,7 +691,8 @@ static void icmp_unreach(struct sk_buff *skb)
NIPQUAD(iph->daddr));
} else {
info = ip_rt_frag_needed(net, iph,
- ntohs(icmph->un.frag.mtu));
+ ntohs(icmph->un.frag.mtu),
+ skb->dev);
if (!info)
goto out;
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 08349267ceb4..e527628f56cf 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -753,23 +753,15 @@ static inline int ip_ufo_append_data(struct sock *sk,
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum = 0;
sk->sk_sndmsg_off = 0;
- }
- err = skb_append_datato_frags(sk,skb, getfrag, from,
- (length - transhdrlen));
- if (!err) {
- /* specify the length of each IP datagram fragment*/
+ /* specify the length of each IP datagram fragment */
skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
__skb_queue_tail(&sk->sk_write_queue, skb);
-
- return 0;
}
- /* There is not enough support do UFO ,
- * so follow normal path
- */
- kfree_skb(skb);
- return err;
+
+ return skb_append_datato_frags(sk, skb, getfrag, from,
+ (length - transhdrlen));
}
/*
@@ -863,9 +855,9 @@ int ip_append_data(struct sock *sk,
csummode = CHECKSUM_PARTIAL;
inet->cork.length += length;
- if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
- (rt->u.dst.dev->features & NETIF_F_UFO)) {
-
+ if (((length> mtu) || !skb_queue_empty(&sk->sk_write_queue)) &&
+ (sk->sk_protocol == IPPROTO_UDP) &&
+ (rt->u.dst.dev->features & NETIF_F_UFO)) {
err = ip_ufo_append_data(sk, getfrag, from, length, hh_len,
fragheaderlen, transhdrlen, mtu,
flags);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 4d8d95404f45..e0514e82308e 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1186,7 +1186,14 @@ int ip_getsockopt(struct sock *sk, int level,
int compat_ip_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
- int err = do_ip_getsockopt(sk, level, optname, optval, optlen);
+ int err;
+
+ if (optname == MCAST_MSFILTER)
+ return compat_mc_getsockopt(sk, level, optname, optval, optlen,
+ ip_getsockopt);
+
+ err = do_ip_getsockopt(sk, level, optname, optval, optlen);
+
#ifdef CONFIG_NETFILTER
/* we need to exclude all possible ENOPROTOOPTs except default case */
if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 0f42d1c1f690..89dee4346f60 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -412,12 +412,12 @@ static struct packet_type rarp_packet_type __initdata = {
.func = ic_rarp_recv,
};
-static inline void ic_rarp_init(void)
+static inline void __init ic_rarp_init(void)
{
dev_add_pack(&rarp_packet_type);
}
-static inline void ic_rarp_cleanup(void)
+static inline void __init ic_rarp_cleanup(void)
{
dev_remove_pack(&rarp_packet_type);
}
@@ -682,7 +682,7 @@ static void __init ic_bootp_init_ext(u8 *e)
/*
* Initialize the DHCP/BOOTP mechanism.
*/
-static inline void ic_bootp_init(void)
+static inline void __init ic_bootp_init(void)
{
int i;
@@ -696,7 +696,7 @@ static inline void ic_bootp_init(void)
/*
* DHCP/BOOTP cleanup.
*/
-static inline void ic_bootp_cleanup(void)
+static inline void __init ic_bootp_cleanup(void)
{
dev_remove_pack(&bootp_packet_type);
}
diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c
index dde28a250d92..4b1c16cbb16b 100644
--- a/net/ipv4/ipvs/ip_vs_proto.c
+++ b/net/ipv4/ipvs/ip_vs_proto.c
@@ -148,7 +148,7 @@ const char * ip_vs_state_name(__u16 proto, int state)
struct ip_vs_protocol *pp = ip_vs_proto_get(proto);
if (pp == NULL || pp->state_name == NULL)
- return "ERR!";
+ return (IPPROTO_IP == proto) ? "NONE" : "ERR!";
return pp->state_name(state);
}
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c
index a842676e1c69..4bf835e1d86d 100644
--- a/net/ipv4/ipvs/ip_vs_proto_ah.c
+++ b/net/ipv4/ipvs/ip_vs_proto_ah.c
@@ -160,6 +160,7 @@ static void ah_exit(struct ip_vs_protocol *pp)
struct ip_vs_protocol ip_vs_protocol_ah = {
.name = "AH",
.protocol = IPPROTO_AH,
+ .num_states = 1,
.dont_defrag = 1,
.init = ah_init,
.exit = ah_exit,
diff --git a/net/ipv4/ipvs/ip_vs_proto_esp.c b/net/ipv4/ipvs/ip_vs_proto_esp.c
index aef0d3ee8e44..db6a6b7b1a0b 100644
--- a/net/ipv4/ipvs/ip_vs_proto_esp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_esp.c
@@ -159,6 +159,7 @@ static void esp_exit(struct ip_vs_protocol *pp)
struct ip_vs_protocol ip_vs_protocol_esp = {
.name = "ESP",
.protocol = IPPROTO_ESP,
+ .num_states = 1,
.dont_defrag = 1,
.init = esp_init,
.exit = esp_exit,
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index 620e40ff79a9..b83dc14b0a4d 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -594,6 +594,7 @@ static void ip_vs_tcp_exit(struct ip_vs_protocol *pp)
struct ip_vs_protocol ip_vs_protocol_tcp = {
.name = "TCP",
.protocol = IPPROTO_TCP,
+ .num_states = IP_VS_TCP_S_LAST,
.dont_defrag = 0,
.appcnt = ATOMIC_INIT(0),
.init = ip_vs_tcp_init,
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 1caa2908373f..75771cb3cd6f 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -409,6 +409,7 @@ static void udp_exit(struct ip_vs_protocol *pp)
struct ip_vs_protocol ip_vs_protocol_udp = {
.name = "UDP",
.protocol = IPPROTO_UDP,
+ .num_states = IP_VS_UDP_S_LAST,
.dont_defrag = 0,
.init = udp_init,
.exit = udp_exit,
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 69c56663cc9a..eff54efe0351 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -288,11 +288,16 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
char *p;
int i;
+ if (buflen < sizeof(struct ip_vs_sync_mesg)) {
+ IP_VS_ERR_RL("sync message header too short\n");
+ return;
+ }
+
/* Convert size back to host byte order */
m->size = ntohs(m->size);
if (buflen != m->size) {
- IP_VS_ERR("bogus message\n");
+ IP_VS_ERR_RL("bogus sync message size\n");
return;
}
@@ -307,9 +312,48 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
for (i=0; i<m->nr_conns; i++) {
unsigned flags, state;
- s = (struct ip_vs_sync_conn *)p;
+ if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
+ IP_VS_ERR_RL("bogus conn in sync message\n");
+ return;
+ }
+ s = (struct ip_vs_sync_conn *) p;
flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
+ flags &= ~IP_VS_CONN_F_HASHED;
+ if (flags & IP_VS_CONN_F_SEQ_MASK) {
+ opt = (struct ip_vs_sync_conn_options *)&s[1];
+ p += FULL_CONN_SIZE;
+ if (p > buffer+buflen) {
+ IP_VS_ERR_RL("bogus conn options in sync message\n");
+ return;
+ }
+ } else {
+ opt = NULL;
+ p += SIMPLE_CONN_SIZE;
+ }
+
state = ntohs(s->state);
+ if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
+ pp = ip_vs_proto_get(s->protocol);
+ if (!pp) {
+ IP_VS_ERR_RL("Unsupported protocol %u in sync msg\n",
+ s->protocol);
+ continue;
+ }
+ if (state >= pp->num_states) {
+ IP_VS_DBG(2, "Invalid %s state %u in sync msg\n",
+ pp->name, state);
+ continue;
+ }
+ } else {
+ /* protocol in templates is not used for state/timeout */
+ pp = NULL;
+ if (state > 0) {
+ IP_VS_DBG(2, "Invalid template state %u in sync msg\n",
+ state);
+ state = 0;
+ }
+ }
+
if (!(flags & IP_VS_CONN_F_TEMPLATE))
cp = ip_vs_conn_in_get(s->protocol,
s->caddr, s->cport,
@@ -345,14 +389,9 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
IP_VS_ERR("ip_vs_conn_new failed\n");
return;
}
- cp->state = state;
} else if (!cp->dest) {
dest = ip_vs_try_bind_dest(cp);
- if (!dest) {
- /* it is an unbound entry created by
- * synchronization */
- cp->flags = flags | IP_VS_CONN_F_HASHED;
- } else
+ if (dest)
atomic_dec(&dest->refcnt);
} else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
(cp->state != state)) {
@@ -371,23 +410,22 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
}
}
- if (flags & IP_VS_CONN_F_SEQ_MASK) {
- opt = (struct ip_vs_sync_conn_options *)&s[1];
+ if (opt)
memcpy(&cp->in_seq, opt, sizeof(*opt));
- p += FULL_CONN_SIZE;
- } else
- p += SIMPLE_CONN_SIZE;
-
atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
cp->state = state;
- pp = ip_vs_proto_get(s->protocol);
- cp->timeout = pp->timeout_table[cp->state];
+ cp->old_state = cp->state;
+ /*
+ * We can not recover the right timeout for templates
+ * in all cases, we can not find the right fwmark
+ * virtual service. If needed, we can do it for
+ * non-fwmark persistent services.
+ */
+ if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table)
+ cp->timeout = pp->timeout_table[state];
+ else
+ cp->timeout = (3*60*HZ);
ip_vs_conn_put(cp);
-
- if (p > buffer+buflen) {
- IP_VS_ERR("bogus message\n");
- return;
- }
}
}
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 719be29f7506..26a37cedcf2e 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -296,9 +296,8 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct nf_queue_entry *e)
if (v->data_len > 0xFFFF)
return -EINVAL;
if (diff > skb_tailroom(e->skb)) {
- nskb = skb_copy_expand(e->skb, 0,
- diff - skb_tailroom(e->skb),
- GFP_ATOMIC);
+ nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
+ diff, GFP_ATOMIC);
if (!nskb) {
printk(KERN_WARNING "ip_queue: error "
"in mangle, dropping packet\n");
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index cacb9cb27dab..5a955c440364 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -303,7 +303,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
const struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_tuple tuple;
- NF_CT_TUPLE_U_BLANK(&tuple);
+ memset(&tuple, 0, sizeof(tuple));
tuple.src.u3.ip = inet->rcv_saddr;
tuple.src.u.tcp.port = inet->sport;
tuple.dst.u3.ip = inet->daddr;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ce25a13f3430..5e3685c5c407 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1430,11 +1430,13 @@ static inline unsigned short guess_mtu(unsigned short old_mtu)
}
unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
- unsigned short new_mtu)
+ unsigned short new_mtu,
+ struct net_device *dev)
{
- int i;
+ int i, k;
unsigned short old_mtu = ntohs(iph->tot_len);
struct rtable *rth;
+ int ikeys[2] = { dev->ifindex, 0 };
__be32 skeys[2] = { iph->saddr, 0, };
__be32 daddr = iph->daddr;
unsigned short est_mtu = 0;
@@ -1442,22 +1444,26 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
if (ipv4_config.no_pmtu_disc)
return 0;
- for (i = 0; i < 2; i++) {
- unsigned hash = rt_hash(daddr, skeys[i], 0);
+ for (k = 0; k < 2; k++) {
+ for (i = 0; i < 2; i++) {
+ unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]);
- rcu_read_lock();
- for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
- rth = rcu_dereference(rth->u.dst.rt_next)) {
- if (rth->fl.fl4_dst == daddr &&
- rth->fl.fl4_src == skeys[i] &&
- rth->rt_dst == daddr &&
- rth->rt_src == iph->saddr &&
- rth->fl.iif == 0 &&
- !(dst_metric_locked(&rth->u.dst, RTAX_MTU)) &&
- net_eq(dev_net(rth->u.dst.dev), net) &&
- rth->rt_genid == atomic_read(&rt_genid)) {
+ rcu_read_lock();
+ for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
+ rth = rcu_dereference(rth->u.dst.rt_next)) {
unsigned short mtu = new_mtu;
+ if (rth->fl.fl4_dst != daddr ||
+ rth->fl.fl4_src != skeys[i] ||
+ rth->rt_dst != daddr ||
+ rth->rt_src != iph->saddr ||
+ rth->fl.oif != ikeys[k] ||
+ rth->fl.iif != 0 ||
+ dst_metric_locked(&rth->u.dst, RTAX_MTU) ||
+ !net_eq(dev_net(rth->u.dst.dev), net) ||
+ rth->rt_genid != atomic_read(&rt_genid))
+ continue;
+
if (new_mtu < 68 || new_mtu >= old_mtu) {
/* BSD 4.2 compatibility hack :-( */
@@ -1483,8 +1489,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
est_mtu = mtu;
}
}
+ rcu_read_unlock();
}
- rcu_read_unlock();
}
return est_mtu ? : new_mtu;
}
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 3a6be23d222f..6a250828b767 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -285,14 +285,12 @@ int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
if (in_flight >= tp->snd_cwnd)
return 1;
- if (!sk_can_gso(sk))
- return 0;
-
left = tp->snd_cwnd - in_flight;
- if (sysctl_tcp_tso_win_divisor)
- return left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd;
- else
- return left <= tcp_max_burst(tp);
+ if (sk_can_gso(sk) &&
+ left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd &&
+ left * tp->mss_cache < sk->sk_gso_max_size)
+ return 1;
+ return left <= tcp_max_burst(tp);
}
EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited);
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index be24d6ee34bd..0e1a8c91f78e 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -229,7 +229,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
*/
tcp_reno_cong_avoid(sk, ack, in_flight);
} else {
- u32 rtt, target_cwnd, diff;
+ u32 rtt, diff;
+ u64 target_cwnd;
/* We have enough RTT samples, so, using the Vegas
* algorithm, we determine if we should increase or
@@ -252,8 +253,9 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
* We keep it as a fixed point number with
* V_PARAM_SHIFT bits to the right of the binary point.
*/
- target_cwnd = ((old_wnd * vegas->baseRTT)
- << V_PARAM_SHIFT) / rtt;
+ target_cwnd = ((u64)old_wnd * vegas->baseRTT);
+ target_cwnd <<= V_PARAM_SHIFT;
+ do_div(target_cwnd, rtt);
/* Calculate the difference between the window we had,
* and the window we would like to have. This quantity
@@ -279,7 +281,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
* utilization.
*/
tp->snd_cwnd = min(tp->snd_cwnd,
- (target_cwnd >>
+ ((u32)target_cwnd >>
V_PARAM_SHIFT)+1);
} else if (tp->snd_cwnd <= tp->snd_ssthresh) {
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index d16689e98516..2bf618a3b00b 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -133,7 +133,8 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
*/
tcp_reno_cong_avoid(sk, ack, in_flight);
} else {
- u32 rtt, target_cwnd;
+ u64 target_cwnd;
+ u32 rtt;
/* We have enough rtt samples, so, using the Veno
* algorithm, we determine the state of the network.
@@ -141,8 +142,9 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
rtt = veno->minrtt;
- target_cwnd = ((tp->snd_cwnd * veno->basertt)
- << V_PARAM_SHIFT) / rtt;
+ target_cwnd = (tp->snd_cwnd * veno->basertt);
+ target_cwnd <<= V_PARAM_SHIFT;
+ do_div(target_cwnd, rtt);
veno->diff = (tp->snd_cwnd << V_PARAM_SHIFT) - target_cwnd;