summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Kconfig1
-rw-r--r--net/ipv4/arp.c7
-rw-r--r--net/ipv4/devinet.c2
-rw-r--r--net/ipv4/fib_frontend.c9
-rw-r--r--net/ipv4/fib_semantics.c5
-rw-r--r--net/ipv4/igmp.c48
-rw-r--r--net/ipv4/ip_fragment.c25
-rw-r--r--net/ipv4/ip_sockglue.c21
-rw-r--r--net/ipv4/ip_tunnel.c82
-rw-r--r--net/ipv4/ipconfig.c4
-rw-r--r--net/ipv4/netfilter/arp_tables.c57
-rw-r--r--net/ipv4/netfilter/ip_tables.c51
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c16
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c19
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c2
-rw-r--r--net/ipv4/raw.c15
-rw-r--r--net/ipv4/route.c8
-rw-r--r--net/ipv4/tcp.c9
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv4/tcp_timer.c15
-rw-r--r--net/ipv4/tcp_vegas.c2
-rw-r--r--net/ipv4/udp.c5
23 files changed, 226 insertions, 185 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 416dfa004cfb..93581bba8643 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -186,6 +186,7 @@ config NET_IPGRE_DEMUX
config NET_IP_TUNNEL
tristate
+ select DST_CACHE
default n
config NET_IPGRE
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 711b4dfa17c3..cb5eb649ad5f 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -223,11 +223,16 @@ static bool arp_key_eq(const struct neighbour *neigh, const void *pkey)
static int arp_constructor(struct neighbour *neigh)
{
- __be32 addr = *(__be32 *)neigh->primary_key;
+ __be32 addr;
struct net_device *dev = neigh->dev;
struct in_device *in_dev;
struct neigh_parms *parms;
+ u32 inaddr_any = INADDR_ANY;
+ if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
+ memcpy(neigh->primary_key, &inaddr_any, arp_tbl.key_len);
+
+ addr = *(__be32 *)neigh->primary_key;
rcu_read_lock();
in_dev = __in_dev_get_rcu(dev);
if (!in_dev) {
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 0212591b0077..63f99e9a821b 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1358,7 +1358,7 @@ skip:
static bool inetdev_valid_mtu(unsigned int mtu)
{
- return mtu >= 68;
+ return mtu >= IPV4_MIN_MTU;
}
static void inetdev_send_gratuitous_arp(struct net_device *dev,
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 0cb240c749bf..c9e68ff48a72 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1252,7 +1252,7 @@ fail:
static void ip_fib_net_exit(struct net *net)
{
- unsigned int i;
+ int i;
rtnl_lock();
#ifdef CONFIG_IP_MULTIPLE_TABLES
@@ -1260,7 +1260,12 @@ static void ip_fib_net_exit(struct net *net)
RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
#endif
- for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
+ /* Destroy the tables in reverse order to guarantee that the
+ * local table, ID 255, is destroyed before the main table, ID
+ * 254. This is necessary as the local table may contain
+ * references to data contained in the main table.
+ */
+ for (i = FIB_TABLE_HASHSZ - 1; i >= 0; i--) {
struct hlist_head *head = &net->ipv4.fib_table_hash[i];
struct hlist_node *tmp;
struct fib_table *tb;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 313e3c11a15a..44abc52bae13 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -640,6 +640,11 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
fi->fib_nh, cfg))
return 1;
}
+#ifdef CONFIG_IP_ROUTE_CLASSID
+ if (cfg->fc_flow &&
+ cfg->fc_flow != fi->fib_nh->nh_tclassid)
+ return 1;
+#endif
if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
(!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
return 0;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 3809d523d012..c67efa3e79dd 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -89,6 +89,7 @@
#include <linux/rtnetlink.h>
#include <linux/times.h>
#include <linux/pkt_sched.h>
+#include <linux/byteorder/generic.h>
#include <net/net_namespace.h>
#include <net/arp.h>
@@ -327,6 +328,23 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted)
return scount;
}
+/* source address selection per RFC 3376 section 4.2.13 */
+static __be32 igmpv3_get_srcaddr(struct net_device *dev,
+ const struct flowi4 *fl4)
+{
+ struct in_device *in_dev = __in_dev_get_rcu(dev);
+
+ if (!in_dev)
+ return htonl(INADDR_ANY);
+
+ for_ifa(in_dev) {
+ if (fl4->saddr == ifa->ifa_local)
+ return fl4->saddr;
+ } endfor_ifa(in_dev);
+
+ return htonl(INADDR_ANY);
+}
+
static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
{
struct sk_buff *skb;
@@ -374,7 +392,11 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
pip->frag_off = htons(IP_DF);
pip->ttl = 1;
pip->daddr = fl4.daddr;
- pip->saddr = fl4.saddr;
+
+ rcu_read_lock();
+ pip->saddr = igmpv3_get_srcaddr(dev, &fl4);
+ rcu_read_unlock();
+
pip->protocol = IPPROTO_IGMP;
pip->tot_len = 0; /* filled in later */
ip_select_ident(net, skb, NULL);
@@ -410,16 +432,17 @@ static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel)
}
static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc,
- int type, struct igmpv3_grec **ppgr)
+ int type, struct igmpv3_grec **ppgr, unsigned int mtu)
{
struct net_device *dev = pmc->interface->dev;
struct igmpv3_report *pih;
struct igmpv3_grec *pgr;
- if (!skb)
- skb = igmpv3_newpack(dev, dev->mtu);
- if (!skb)
- return NULL;
+ if (!skb) {
+ skb = igmpv3_newpack(dev, mtu);
+ if (!skb)
+ return NULL;
+ }
pgr = (struct igmpv3_grec *)skb_put(skb, sizeof(struct igmpv3_grec));
pgr->grec_type = type;
pgr->grec_auxwords = 0;
@@ -441,12 +464,17 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
struct igmpv3_grec *pgr = NULL;
struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list;
int scount, stotal, first, isquery, truncate;
+ unsigned int mtu;
if (pmc->multiaddr == IGMP_ALL_HOSTS)
return skb;
if (ipv4_is_local_multicast(pmc->multiaddr) && !sysctl_igmp_llm_reports)
return skb;
+ mtu = READ_ONCE(dev->mtu);
+ if (mtu < IPV4_MIN_MTU)
+ return skb;
+
isquery = type == IGMPV3_MODE_IS_INCLUDE ||
type == IGMPV3_MODE_IS_EXCLUDE;
truncate = type == IGMPV3_MODE_IS_EXCLUDE ||
@@ -467,7 +495,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
if (skb)
igmpv3_sendpack(skb);
- skb = igmpv3_newpack(dev, dev->mtu);
+ skb = igmpv3_newpack(dev, mtu);
}
}
first = 1;
@@ -494,12 +522,12 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
pgr->grec_nsrcs = htons(scount);
if (skb)
igmpv3_sendpack(skb);
- skb = igmpv3_newpack(dev, dev->mtu);
+ skb = igmpv3_newpack(dev, mtu);
first = 1;
scount = 0;
}
if (first) {
- skb = add_grhead(skb, pmc, type, &pgr);
+ skb = add_grhead(skb, pmc, type, &pgr, mtu);
first = 0;
}
if (!skb)
@@ -533,7 +561,7 @@ empty_source:
igmpv3_sendpack(skb);
skb = NULL; /* add_grhead will get a new one */
}
- skb = add_grhead(skb, pmc, type, &pgr);
+ skb = add_grhead(skb, pmc, type, &pgr, mtu);
}
}
if (pgr)
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index e2e162432aa3..7057a1b09b5e 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -200,6 +200,7 @@ static void ip_expire(unsigned long arg)
qp = container_of((struct inet_frag_queue *) arg, struct ipq, q);
net = container_of(qp->q.net, struct net, ipv4.frags);
+ rcu_read_lock();
spin_lock(&qp->q.lock);
if (qp->q.flags & INET_FRAG_COMPLETE)
@@ -209,7 +210,7 @@ static void ip_expire(unsigned long arg)
IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
if (!inet_frag_evicting(&qp->q)) {
- struct sk_buff *head = qp->q.fragments;
+ struct sk_buff *clone, *head = qp->q.fragments;
const struct iphdr *iph;
int err;
@@ -218,32 +219,40 @@ static void ip_expire(unsigned long arg)
if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments)
goto out;
- rcu_read_lock();
head->dev = dev_get_by_index_rcu(net, qp->iif);
if (!head->dev)
- goto out_rcu_unlock;
+ goto out;
+
/* skb has no dst, perform route lookup again */
iph = ip_hdr(head);
err = ip_route_input_noref(head, iph->daddr, iph->saddr,
iph->tos, head->dev);
if (err)
- goto out_rcu_unlock;
+ goto out;
/* Only an end host needs to send an ICMP
* "Fragment Reassembly Timeout" message, per RFC792.
*/
if (frag_expire_skip_icmp(qp->user) &&
(skb_rtable(head)->rt_type != RTN_LOCAL))
- goto out_rcu_unlock;
+ goto out;
+
+ clone = skb_clone(head, GFP_ATOMIC);
/* Send an ICMP "Fragment Reassembly Timeout" message. */
- icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
-out_rcu_unlock:
- rcu_read_unlock();
+ if (clone) {
+ spin_unlock(&qp->q.lock);
+ icmp_send(clone, ICMP_TIME_EXCEEDED,
+ ICMP_EXC_FRAGTIME, 0);
+ consume_skb(clone);
+ goto out_rcu_unlock;
+ }
}
out:
spin_unlock(&qp->q.lock);
+out_rcu_unlock:
+ rcu_read_unlock();
ipq_put(qp);
}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 097a1243c16c..d35509212013 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1221,11 +1221,8 @@ int ip_setsockopt(struct sock *sk, int level,
if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
optname != IP_IPSEC_POLICY &&
optname != IP_XFRM_POLICY &&
- !ip_mroute_opt(optname)) {
- lock_sock(sk);
+ !ip_mroute_opt(optname))
err = nf_setsockopt(sk, PF_INET, optname, optval, optlen);
- release_sock(sk);
- }
#endif
return err;
}
@@ -1250,12 +1247,9 @@ int compat_ip_setsockopt(struct sock *sk, int level, int optname,
if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
optname != IP_IPSEC_POLICY &&
optname != IP_XFRM_POLICY &&
- !ip_mroute_opt(optname)) {
- lock_sock(sk);
- err = compat_nf_setsockopt(sk, PF_INET, optname,
- optval, optlen);
- release_sock(sk);
- }
+ !ip_mroute_opt(optname))
+ err = compat_nf_setsockopt(sk, PF_INET, optname, optval,
+ optlen);
#endif
return err;
}
@@ -1533,10 +1527,7 @@ int ip_getsockopt(struct sock *sk, int level,
if (get_user(len, optlen))
return -EFAULT;
- lock_sock(sk);
- err = nf_getsockopt(sk, PF_INET, optname, optval,
- &len);
- release_sock(sk);
+ err = nf_getsockopt(sk, PF_INET, optname, optval, &len);
if (err >= 0)
err = put_user(len, optlen);
return err;
@@ -1568,9 +1559,7 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname,
if (get_user(len, optlen))
return -EFAULT;
- lock_sock(sk);
err = compat_nf_getsockopt(sk, PF_INET, optname, optval, &len);
- release_sock(sk);
if (err >= 0)
err = put_user(len, optlen);
return err;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 3310ac75e3f3..80e2d1b0c08c 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -69,61 +69,6 @@ static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
IP_TNL_HASH_BITS);
}
-static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
- struct dst_entry *dst, __be32 saddr)
-{
- struct dst_entry *old_dst;
-
- dst_clone(dst);
- old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
- dst_release(old_dst);
- idst->saddr = saddr;
-}
-
-static noinline void tunnel_dst_set(struct ip_tunnel *t,
- struct dst_entry *dst, __be32 saddr)
-{
- __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
-}
-
-static void tunnel_dst_reset(struct ip_tunnel *t)
-{
- tunnel_dst_set(t, NULL, 0);
-}
-
-void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
-{
- int i;
-
- for_each_possible_cpu(i)
- __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
-}
-EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
-
-static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
- u32 cookie, __be32 *saddr)
-{
- struct ip_tunnel_dst *idst;
- struct dst_entry *dst;
-
- rcu_read_lock();
- idst = raw_cpu_ptr(t->dst_cache);
- dst = rcu_dereference(idst->dst);
- if (dst && !atomic_inc_not_zero(&dst->__refcnt))
- dst = NULL;
- if (dst) {
- if (!dst->obsolete || dst->ops->check(dst, cookie)) {
- *saddr = idst->saddr;
- } else {
- tunnel_dst_reset(t);
- dst_release(dst);
- dst = NULL;
- }
- }
- rcu_read_unlock();
- return (struct rtable *)dst;
-}
-
static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
__be16 flags, __be32 key)
{
@@ -382,11 +327,12 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
if (!IS_ERR(rt)) {
tdev = rt->dst.dev;
- tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
ip_rt_put(rt);
}
if (dev->type != ARPHRD_ETHER)
dev->flags |= IFF_POINTOPOINT;
+
+ dst_cache_reset(&tunnel->dst_cache);
}
if (!tdev && tunnel->parms.link)
@@ -400,8 +346,8 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
dev->needed_headroom = t_hlen + hlen;
mtu -= (dev->hard_header_len + t_hlen);
- if (mtu < 68)
- mtu = 68;
+ if (mtu < IPV4_MIN_MTU)
+ mtu = IPV4_MIN_MTU;
return mtu;
}
@@ -733,7 +679,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
goto tx_error;
- rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
+ rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) :
+ NULL;
if (!rt) {
rt = ip_route_output_key(tunnel->net, &fl4);
@@ -743,7 +690,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
goto tx_error;
}
if (connected)
- tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
+ dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
+ fl4.saddr);
}
if (rt->dst.dev == dev) {
@@ -841,7 +789,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,
if (set_mtu)
dev->mtu = mtu;
}
- ip_tunnel_dst_reset_all(t);
+ dst_cache_reset(&t->dst_cache);
netdev_state_change(dev);
}
@@ -980,7 +928,7 @@ static void ip_tunnel_dev_free(struct net_device *dev)
struct ip_tunnel *tunnel = netdev_priv(dev);
gro_cells_destroy(&tunnel->gro_cells);
- free_percpu(tunnel->dst_cache);
+ dst_cache_destroy(&tunnel->dst_cache);
free_percpu(dev->tstats);
free_netdev(dev);
}
@@ -1174,15 +1122,15 @@ int ip_tunnel_init(struct net_device *dev)
if (!dev->tstats)
return -ENOMEM;
- tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
- if (!tunnel->dst_cache) {
+ err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
+ if (err) {
free_percpu(dev->tstats);
- return -ENOMEM;
+ return err;
}
err = gro_cells_init(&tunnel->gro_cells, dev);
if (err) {
- free_percpu(tunnel->dst_cache);
+ dst_cache_destroy(&tunnel->dst_cache);
free_percpu(dev->tstats);
return err;
}
@@ -1212,7 +1160,7 @@ void ip_tunnel_uninit(struct net_device *dev)
if (itn->fb_tunnel_dev != dev)
ip_tunnel_del(itn, netdev_priv(dev));
- ip_tunnel_dst_reset_all(tunnel);
+ dst_cache_reset(&tunnel->dst_cache);
}
EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 0bc7412d9e14..9d6b9c4c5f82 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -152,7 +152,11 @@ static char dhcp_client_identifier[253] __initdata;
/* Persistent data: */
+#ifdef IPCONFIG_DYNAMIC
static int ic_proto_used; /* Protocol used, if any */
+#else
+#define ic_proto_used 0
+#endif
static __be32 ic_nameservers[CONF_NAMESERVERS_MAX]; /* DNS Server IP addresses */
static u8 ic_domain[64]; /* DNS (not NIS) domain name */
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 6e3e0e8b1ce3..4cfcc22f7430 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -367,23 +367,12 @@ static inline bool unconditional(const struct arpt_entry *e)
memcmp(&e->arp, &uncond, sizeof(uncond)) == 0;
}
-static bool find_jump_target(const struct xt_table_info *t,
- const struct arpt_entry *target)
-{
- struct arpt_entry *iter;
-
- xt_entry_foreach(iter, t->entries, t->size) {
- if (iter == target)
- return true;
- }
- return false;
-}
-
/* Figures out from what hook each rule can be called: returns 0 if
* there are loops. Puts hook bitmask in comefrom.
*/
static int mark_source_chains(const struct xt_table_info *newinfo,
- unsigned int valid_hooks, void *entry0)
+ unsigned int valid_hooks, void *entry0,
+ unsigned int *offsets)
{
unsigned int hook;
@@ -472,10 +461,11 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
/* This a jump; chase it. */
duprintf("Jump rule %u -> %u\n",
pos, newpos);
+ if (!xt_find_jump_offset(offsets, newpos,
+ newinfo->number))
+ return 0;
e = (struct arpt_entry *)
(entry0 + newpos);
- if (!find_jump_target(newinfo, e))
- return 0;
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
@@ -521,11 +511,13 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
{
struct xt_entry_target *t;
struct xt_target *target;
+ unsigned long pcnt;
int ret;
- e->counters.pcnt = xt_percpu_counter_alloc();
- if (IS_ERR_VALUE(e->counters.pcnt))
+ pcnt = xt_percpu_counter_alloc();
+ if (IS_ERR_VALUE(pcnt))
return -ENOMEM;
+ e->counters.pcnt = pcnt;
t = arpt_get_target(e);
target = xt_request_find_target(NFPROTO_ARP, t->u.user.name,
@@ -642,6 +634,7 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
const struct arpt_replace *repl)
{
struct arpt_entry *iter;
+ unsigned int *offsets;
unsigned int i;
int ret = 0;
@@ -655,6 +648,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
}
duprintf("translate_table: size %u\n", newinfo->size);
+ offsets = xt_alloc_entry_offsets(newinfo->number);
+ if (!offsets)
+ return -ENOMEM;
i = 0;
/* Walk through entries, checking offsets. */
@@ -665,7 +661,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
repl->underflow,
repl->valid_hooks);
if (ret != 0)
- break;
+ goto out_free;
+ if (i < repl->num_entries)
+ offsets[i] = (void *)iter - entry0;
++i;
if (strcmp(arpt_get_target(iter)->u.user.name,
XT_ERROR_TARGET) == 0)
@@ -673,12 +671,13 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
}
duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
if (ret != 0)
- return ret;
+ goto out_free;
+ ret = -EINVAL;
if (i != repl->num_entries) {
duprintf("translate_table: %u not %u entries\n",
i, repl->num_entries);
- return -EINVAL;
+ goto out_free;
}
/* Check hooks all assigned */
@@ -689,17 +688,20 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
duprintf("Invalid hook entry %u %u\n",
i, repl->hook_entry[i]);
- return -EINVAL;
+ goto out_free;
}
if (newinfo->underflow[i] == 0xFFFFFFFF) {
duprintf("Invalid underflow %u %u\n",
i, repl->underflow[i]);
- return -EINVAL;
+ goto out_free;
}
}
- if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
- return -ELOOP;
+ if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
+ ret = -ELOOP;
+ goto out_free;
+ }
+ kvfree(offsets);
/* Finally, each sanity check must pass */
i = 0;
@@ -720,6 +722,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
}
return ret;
+ out_free:
+ kvfree(offsets);
+ return ret;
}
static void get_counters(const struct xt_table_info *t,
@@ -1336,8 +1341,8 @@ static int translate_compat_table(struct xt_table_info **pinfo,
newinfo->number = compatr->num_entries;
for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
- newinfo->hook_entry[i] = info->hook_entry[i];
- newinfo->underflow[i] = info->underflow[i];
+ newinfo->hook_entry[i] = compatr->hook_entry[i];
+ newinfo->underflow[i] = compatr->underflow[i];
}
entry1 = newinfo->entries;
pos = entry1;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index a399c5419622..a98173d1ea97 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -443,23 +443,12 @@ ipt_do_table(struct sk_buff *skb,
#endif
}
-static bool find_jump_target(const struct xt_table_info *t,
- const struct ipt_entry *target)
-{
- struct ipt_entry *iter;
-
- xt_entry_foreach(iter, t->entries, t->size) {
- if (iter == target)
- return true;
- }
- return false;
-}
-
/* Figures out from what hook each rule can be called: returns 0 if
there are loops. Puts hook bitmask in comefrom. */
static int
mark_source_chains(const struct xt_table_info *newinfo,
- unsigned int valid_hooks, void *entry0)
+ unsigned int valid_hooks, void *entry0,
+ unsigned int *offsets)
{
unsigned int hook;
@@ -552,10 +541,11 @@ mark_source_chains(const struct xt_table_info *newinfo,
/* This a jump; chase it. */
duprintf("Jump rule %u -> %u\n",
pos, newpos);
+ if (!xt_find_jump_offset(offsets, newpos,
+ newinfo->number))
+ return 0;
e = (struct ipt_entry *)
(entry0 + newpos);
- if (!find_jump_target(newinfo, e))
- return 0;
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
@@ -663,10 +653,12 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
unsigned int j;
struct xt_mtchk_param mtpar;
struct xt_entry_match *ematch;
+ unsigned long pcnt;
- e->counters.pcnt = xt_percpu_counter_alloc();
- if (IS_ERR_VALUE(e->counters.pcnt))
+ pcnt = xt_percpu_counter_alloc();
+ if (IS_ERR_VALUE(pcnt))
return -ENOMEM;
+ e->counters.pcnt = pcnt;
j = 0;
mtpar.net = net;
@@ -811,6 +803,7 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
const struct ipt_replace *repl)
{
struct ipt_entry *iter;
+ unsigned int *offsets;
unsigned int i;
int ret = 0;
@@ -824,6 +817,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
}
duprintf("translate_table: size %u\n", newinfo->size);
+ offsets = xt_alloc_entry_offsets(newinfo->number);
+ if (!offsets)
+ return -ENOMEM;
i = 0;
/* Walk through entries, checking offsets. */
xt_entry_foreach(iter, entry0, newinfo->size) {
@@ -833,17 +829,20 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
repl->underflow,
repl->valid_hooks);
if (ret != 0)
- return ret;
+ goto out_free;
+ if (i < repl->num_entries)
+ offsets[i] = (void *)iter - entry0;
++i;
if (strcmp(ipt_get_target(iter)->u.user.name,
XT_ERROR_TARGET) == 0)
++newinfo->stacksize;
}
+ ret = -EINVAL;
if (i != repl->num_entries) {
duprintf("translate_table: %u not %u entries\n",
i, repl->num_entries);
- return -EINVAL;
+ goto out_free;
}
/* Check hooks all assigned */
@@ -854,17 +853,20 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
duprintf("Invalid hook entry %u %u\n",
i, repl->hook_entry[i]);
- return -EINVAL;
+ goto out_free;
}
if (newinfo->underflow[i] == 0xFFFFFFFF) {
duprintf("Invalid underflow %u %u\n",
i, repl->underflow[i]);
- return -EINVAL;
+ goto out_free;
}
}
- if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
- return -ELOOP;
+ if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
+ ret = -ELOOP;
+ goto out_free;
+ }
+ kvfree(offsets);
/* Finally, each sanity check must pass */
i = 0;
@@ -885,6 +887,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
}
return ret;
+ out_free:
+ kvfree(offsets);
+ return ret;
}
static void
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 4a9e6db9df8d..16599bae11dd 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -365,7 +365,7 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
const struct ipt_entry *e = par->entryinfo;
struct clusterip_config *config;
- int ret;
+ int ret, i;
if (par->nft_compat) {
pr_err("cannot use CLUSTERIP target from nftables compat\n");
@@ -384,8 +384,18 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
pr_info("Please specify destination IP\n");
return -EINVAL;
}
-
- /* FIXME: further sanity checks */
+ if (cipinfo->num_local_nodes > ARRAY_SIZE(cipinfo->local_nodes)) {
+ pr_info("bad num_local_nodes %u\n", cipinfo->num_local_nodes);
+ return -EINVAL;
+ }
+ for (i = 0; i < cipinfo->num_local_nodes; i++) {
+ if (cipinfo->local_nodes[i] - 1 >=
+ sizeof(config->local_nodes) * 8) {
+ pr_info("bad local_nodes[%d] %u\n",
+ i, cipinfo->local_nodes[i]);
+ return -EINVAL;
+ }
+ }
config = clusterip_config_find_get(par->net, e->ip.dst.s_addr, 1);
if (!config) {
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 6a20195a3a2a..3fe8c951f427 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -259,15 +259,19 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
struct nf_conntrack_tuple tuple;
memset(&tuple, 0, sizeof(tuple));
+
+ lock_sock(sk);
tuple.src.u3.ip = inet->inet_rcv_saddr;
tuple.src.u.tcp.port = inet->inet_sport;
tuple.dst.u3.ip = inet->inet_daddr;
tuple.dst.u.tcp.port = inet->inet_dport;
tuple.src.l3num = PF_INET;
tuple.dst.protonum = sk->sk_protocol;
+ release_sock(sk);
/* We only do TCP and SCTP at the moment: is there a better way? */
- if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP) {
+ if (tuple.dst.protonum != IPPROTO_TCP &&
+ tuple.dst.protonum != IPPROTO_SCTP) {
pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n");
return -ENOPROTOOPT;
}
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 2689c9c4f1a0..182eb878633d 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1260,16 +1260,6 @@ static const struct nf_conntrack_expect_policy snmp_exp_policy = {
.timeout = 180,
};
-static struct nf_conntrack_helper snmp_helper __read_mostly = {
- .me = THIS_MODULE,
- .help = help,
- .expect_policy = &snmp_exp_policy,
- .name = "snmp",
- .tuple.src.l3num = AF_INET,
- .tuple.src.u.udp.port = cpu_to_be16(SNMP_PORT),
- .tuple.dst.protonum = IPPROTO_UDP,
-};
-
static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
.me = THIS_MODULE,
.help = help,
@@ -1288,17 +1278,10 @@ static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
static int __init nf_nat_snmp_basic_init(void)
{
- int ret = 0;
-
BUG_ON(nf_nat_snmp_hook != NULL);
RCU_INIT_POINTER(nf_nat_snmp_hook, help);
- ret = nf_conntrack_helper_register(&snmp_trap_helper);
- if (ret < 0) {
- nf_conntrack_helper_unregister(&snmp_helper);
- return ret;
- }
- return ret;
+ return nf_conntrack_helper_register(&snmp_trap_helper);
}
static void __exit nf_nat_snmp_basic_fini(void)
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index c747b2d9eb77..d4acf38b60fd 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -124,6 +124,8 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
/* ip_route_me_harder expects skb->dst to be set */
skb_dst_set_noref(nskb, skb_dst(oldskb));
+ nskb->mark = IP4_REPLY_MARK(net, oldskb->mark);
+
skb_reserve(nskb, LL_MAX_HEADER);
niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP,
ip4_dst_hoplimit(skb_dst(nskb)));
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 8f2cd7d09720..4d3d4291c82f 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -500,11 +500,16 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int err;
struct ip_options_data opt_copy;
struct raw_frag_vec rfv;
+ int hdrincl;
err = -EMSGSIZE;
if (len > 0xFFFF)
goto out;
+ /* hdrincl should be READ_ONCE(inet->hdrincl)
+ * but READ_ONCE() doesn't work with bit fields
+ */
+ hdrincl = inet->hdrincl;
/*
* Check the flags.
*/
@@ -579,7 +584,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
/* Linux does not mangle headers on raw sockets,
* so that IP options + IP_HDRINCL is non-sense.
*/
- if (inet->hdrincl)
+ if (hdrincl)
goto done;
if (ipc.opt->opt.srr) {
if (!daddr)
@@ -601,9 +606,9 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
RT_SCOPE_UNIVERSE,
- inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
+ hdrincl ? IPPROTO_RAW : sk->sk_protocol,
inet_sk_flowi_flags(sk) |
- (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
+ (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
daddr, saddr, 0, 0);
if (!saddr && ipc.oif) {
@@ -612,7 +617,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
goto done;
}
- if (!inet->hdrincl) {
+ if (!hdrincl) {
rfv.msg = msg;
rfv.hlen = 0;
@@ -637,7 +642,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
goto do_confirm;
back_from_confirm:
- if (inet->hdrincl)
+ if (hdrincl)
err = raw_send_hdrinc(sk, &fl4, msg, len,
&rt, msg->msg_flags);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 52d718e3f077..f0020260b0d4 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -126,10 +126,13 @@ static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
static int ip_rt_error_cost __read_mostly = HZ;
static int ip_rt_error_burst __read_mostly = 5 * HZ;
static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
-static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
+static u32 ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
static int ip_rt_min_advmss __read_mostly = 256;
static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
+
+static int ip_min_valid_pmtu __read_mostly = IPV4_MIN_MTU;
+
/*
* Interface to generic destination cache.
*/
@@ -2765,7 +2768,8 @@ static struct ctl_table ipv4_route_table[] = {
.data = &ip_rt_min_pmtu,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &ip_min_valid_pmtu,
},
{
.procname = "min_adv_mss",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 5597120c8ffd..23d77ff1da59 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2176,6 +2176,9 @@ adjudge_to_death:
tcp_send_active_reset(sk, GFP_ATOMIC);
NET_INC_STATS_BH(sock_net(sk),
LINUX_MIB_TCPABORTONMEMORY);
+ } else if (!check_net(sock_net(sk))) {
+ /* Not possible to send reset; just close */
+ tcp_set_state(sk, TCP_CLOSE);
}
}
@@ -2273,6 +2276,12 @@ int tcp_disconnect(struct sock *sk, int flags)
WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
+ if (sk->sk_frag.page) {
+ put_page(sk->sk_frag.page);
+ sk->sk_frag.page = NULL;
+ sk->sk_frag.offset = 0;
+ }
+
sk->sk_error_report(sk);
return err;
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a5d790c13ef5..61c93a93f228 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -823,7 +823,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
tcp_time_stamp,
req->ts_recent,
0,
- tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
+ tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
AF_INET),
inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
ip_hdr(skb)->tos);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 1ec12a4f327e..35f638cfc675 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -46,11 +46,19 @@ static void tcp_write_err(struct sock *sk)
* to prevent DoS attacks. It is called when a retransmission timeout
* or zero probe timeout occurs on orphaned socket.
*
+ * Also close if our net namespace is exiting; in that case there is no
+ * hope of ever communicating again since all netns interfaces are already
+ * down (or about to be down), and we need to release our dst references,
+ * which have been moved to the netns loopback interface, so the namespace
+ * can finish exiting. This condition is only possible if we are a kernel
+ * socket, as those do not hold references to the namespace.
+ *
* Criteria is still not confirmed experimentally and may change.
* We kill the socket, if:
* 1. If number of orphaned sockets exceeds an administratively configured
* limit.
* 2. If we have strong memory pressure.
+ * 3. If our net namespace is exiting.
*/
static int tcp_out_of_resources(struct sock *sk, bool do_reset)
{
@@ -79,6 +87,13 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY);
return 1;
}
+
+ if (!check_net(sock_net(sk))) {
+ /* Not possible to send reset; just close */
+ tcp_done(sk);
+ return 1;
+ }
+
return 0;
}
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 13951c4087d4..b9fac0522be6 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -158,7 +158,7 @@ EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event);
static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp)
{
- return min(tp->snd_ssthresh, tp->snd_cwnd-1);
+ return min(tp->snd_ssthresh, tp->snd_cwnd);
}
static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 301e60829c7e..a98ae890adb9 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1744,6 +1744,11 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
err = udplite_checksum_init(skb, uh);
if (err)
return err;
+
+ if (UDP_SKB_CB(skb)->partial_cov) {
+ skb->csum = inet_compute_pseudo(skb, proto);
+ return 0;
+ }
}
return skb_checksum_init_zero_check(skb, proto, uh->check,