summaryrefslogtreecommitdiff
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/addrconf.c21
-rw-r--r--net/ipv6/inet6_hashtables.c3
-rw-r--r--net/ipv6/ip6_fib.c7
-rw-r--r--net/ipv6/ip6_flowlabel.c29
-rw-r--r--net/ipv6/ip6_input.c10
-rw-r--r--net/ipv6/ip6_output.c4
-rw-r--r--net/ipv6/ip6_tunnel.c16
-rw-r--r--net/ipv6/ip6_vti.c33
-rw-r--r--net/ipv6/ip6mr.c11
-rw-r--r--net/ipv6/ipv6_sockglue.c10
-rw-r--r--net/ipv6/mcast.c5
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c281
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c3
-rw-r--r--net/ipv6/output_core.c30
-rw-r--r--net/ipv6/ping.c2
-rw-r--r--net/ipv6/raw.c27
-rw-r--r--net/ipv6/reassembly.c363
-rw-r--r--net/ipv6/route.c1
-rw-r--r--net/ipv6/sit.c7
-rw-r--r--net/ipv6/tcp_ipv6.c8
-rw-r--r--net/ipv6/udp.c4
-rw-r--r--net/ipv6/xfrm6_tunnel.c4
22 files changed, 344 insertions, 535 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4ce7f9195151..a4c00242a90b 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3189,6 +3189,10 @@ static void addrconf_dev_config(struct net_device *dev)
(dev->type != ARPHRD_6LOWPAN) &&
(dev->type != ARPHRD_NONE)) {
/* Alas, we support only Ethernet autoconfiguration. */
+ idev = __in6_dev_get(dev);
+ if (!IS_ERR_OR_NULL(idev) && dev->flags & IFF_UP &&
+ dev->flags & IFF_MULTICAST)
+ ipv6_mc_up(idev);
return;
}
@@ -5443,13 +5447,20 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
switch (event) {
case RTM_NEWADDR:
/*
- * If the address was optimistic
- * we inserted the route at the start of
- * our DAD process, so we don't need
- * to do it again
+ * If the address was optimistic we inserted the route at the
+ * start of our DAD process, so we don't need to do it again.
+ * If the device was taken down in the middle of the DAD
+ * cycle there is a race where we could get here without a
+ * host route, so nothing to insert. That will be fixed when
+ * the device is brought up.
*/
- if (!rcu_access_pointer(ifp->rt->rt6i_node))
+ if (ifp->rt && !rcu_access_pointer(ifp->rt->rt6i_node)) {
ip6_ins_rt(ifp->rt);
+ } else if (!ifp->rt && (ifp->idev->dev->flags & IFF_UP)) {
+ pr_warn("BUG: Address %pI6c on device %s is missing its host route.\n",
+ &ifp->addr, ifp->idev->dev->name);
+ }
+
if (ifp->idev->cnf.forwarding)
addrconf_join_anycast(ifp);
if (!ipv6_addr_any(&ifp->peer_addr))
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 02761c9fe43e..d47cab6d7c6d 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -133,9 +133,10 @@ struct sock *inet6_lookup_listener(struct net *net,
int score, hiscore = 0, matches = 0, reuseport = 0;
bool exact_dif = inet6_exact_dif_match(net, skb);
struct sock *sk, *result = NULL;
+ struct hlist_nulls_node *node;
u32 phash = 0;
- sk_for_each(sk, &ilb->head) {
+ sk_nulls_for_each(sk, node, &ilb->nulls_head) {
score = compute_score(sk, net, hnum, daddr, dif, exact_dif);
if (score > hiscore) {
reuseport = sk->sk_reuseport;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 5da864997495..85c7e250c7a8 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -784,8 +784,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
found++;
break;
}
- if (rt_can_ecmp)
- fallback_ins = fallback_ins ?: ins;
+ fallback_ins = fallback_ins ?: ins;
goto next_iter;
}
@@ -825,7 +824,9 @@ next_iter:
}
if (fallback_ins && !found) {
- /* No ECMP-able route found, replace first non-ECMP one */
+ /* No matching route with same ecmp-able-ness found, replace
+ * first matching route
+ */
ins = fallback_ins;
iter = *ins;
found++;
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index b82e439804d1..6a6011160f18 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -94,15 +94,21 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label)
return fl;
}
+static void fl_free_rcu(struct rcu_head *head)
+{
+ struct ip6_flowlabel *fl = container_of(head, struct ip6_flowlabel, rcu);
+
+ if (fl->share == IPV6_FL_S_PROCESS)
+ put_pid(fl->owner.pid);
+ kfree(fl->opt);
+ kfree(fl);
+}
+
static void fl_free(struct ip6_flowlabel *fl)
{
- if (fl) {
- if (fl->share == IPV6_FL_S_PROCESS)
- put_pid(fl->owner.pid);
- kfree(fl->opt);
- kfree_rcu(fl, rcu);
- }
+ if (fl)
+ call_rcu(&fl->rcu, fl_free_rcu);
}
static void fl_release(struct ip6_flowlabel *fl)
@@ -248,9 +254,9 @@ struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label)
rcu_read_lock_bh();
for_each_sk_fl_rcu(np, sfl) {
struct ip6_flowlabel *fl = sfl->fl;
- if (fl->label == label) {
+
+ if (fl->label == label && atomic_inc_not_zero(&fl->users)) {
fl->lastuse = jiffies;
- atomic_inc(&fl->users);
rcu_read_unlock_bh();
return fl;
}
@@ -617,7 +623,8 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
goto done;
}
fl1 = sfl->fl;
- atomic_inc(&fl1->users);
+ if (!atomic_inc_not_zero(&fl1->users))
+ fl1 = NULL;
break;
}
}
@@ -634,9 +641,9 @@ recheck:
if (fl1->share == IPV6_FL_S_EXCL ||
fl1->share != fl->share ||
((fl1->share == IPV6_FL_S_PROCESS) &&
- (fl1->owner.pid == fl->owner.pid)) ||
+ (fl1->owner.pid != fl->owner.pid)) ||
((fl1->share == IPV6_FL_S_USER) &&
- uid_eq(fl1->owner.uid, fl->owner.uid)))
+ !uid_eq(fl1->owner.uid, fl->owner.uid)))
goto release;
err = -ENOMEM;
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index aacfb4bce153..e726a61ae6dc 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -168,6 +168,16 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
if (ipv6_addr_is_multicast(&hdr->saddr))
goto err;
+ /* While RFC4291 is not explicit about v4mapped addresses
+ * in IPv6 headers, it seems clear linux dual-stack
+ * model can not deal properly with these.
+ * Security models could be fooled by ::ffff:127.0.0.1 for example.
+ *
+ * https://tools.ietf.org/html/draft-itojun-v6ops-v4mapped-harmful-02
+ */
+ if (ipv6_addr_v4mapped(&hdr->saddr))
+ goto err;
+
skb->transport_header = skb->network_header + sizeof(*hdr);
IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index b723987761be..11407dd6bc7c 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -592,7 +592,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
inet6_sk(skb->sk) : NULL;
struct ipv6hdr *tmp_hdr;
struct frag_hdr *fh;
- unsigned int mtu, hlen, left, len;
+ unsigned int mtu, hlen, left, len, nexthdr_offset;
int hroom, troom;
__be32 frag_id;
int ptr, offset = 0, err = 0;
@@ -603,6 +603,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
goto fail;
hlen = err;
nexthdr = *prevhdr;
+ nexthdr_offset = prevhdr - skb_network_header(skb);
mtu = ip6_skb_dst_mtu(skb);
@@ -637,6 +638,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
(err = skb_checksum_help(skb)))
goto fail;
+ prevhdr = skb_network_header(skb) + nexthdr_offset;
hroom = LL_RESERVED_SPACE(rt->dst.dev);
if (skb_has_frag_list(skb)) {
int first_len = skb_pagelen(skb);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index f89516d04150..ed795d50a8d2 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -634,7 +634,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
IPPROTO_IPIP,
RT_TOS(eiph->tos), 0);
if (IS_ERR(rt) ||
- rt->dst.dev->type != ARPHRD_TUNNEL) {
+ rt->dst.dev->type != ARPHRD_TUNNEL6) {
if (!IS_ERR(rt))
ip_rt_put(rt);
goto out;
@@ -644,7 +644,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
ip_rt_put(rt);
if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
skb2->dev) ||
- skb_dst(skb2)->dev->type != ARPHRD_TUNNEL)
+ skb_dst(skb2)->dev->type != ARPHRD_TUNNEL6)
goto out;
}
@@ -1275,11 +1275,11 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
fl6.flowi6_mark = skb->mark;
}
+ dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph));
+
if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
return -1;
- dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph));
-
skb_set_inner_ipproto(skb, IPPROTO_IPIP);
err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
@@ -1362,11 +1362,11 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
fl6.flowi6_mark = skb->mark;
}
+ dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h));
+
if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
return -1;
- dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h));
-
skb_set_inner_ipproto(skb, IPPROTO_IPV6);
err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
@@ -1861,10 +1861,8 @@ static int ip6_tnl_dev_init(struct net_device *dev)
if (err)
return err;
ip6_tnl_link_config(t);
- if (t->parms.collect_md) {
- dev->features |= NETIF_F_NETNS_LOCAL;
+ if (t->parms.collect_md)
netif_keep_dst(dev);
- }
return 0;
}
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index c2b2ee71fc6c..b9f5155a77ef 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -315,7 +315,7 @@ static int vti6_rcv(struct sk_buff *skb)
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
rcu_read_unlock();
- return 0;
+ goto discard;
}
ipv6h = ipv6_hdr(skb);
@@ -453,8 +453,35 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
int err = -1;
int mtu;
- if (!dst)
- goto tx_err_link_failure;
+ if (!dst) {
+ switch (skb->protocol) {
+ case htons(ETH_P_IP): {
+ struct rtable *rt;
+
+ fl->u.ip4.flowi4_oif = dev->ifindex;
+ fl->u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC;
+ rt = __ip_route_output_key(dev_net(dev), &fl->u.ip4);
+ if (IS_ERR(rt))
+ goto tx_err_link_failure;
+ dst = &rt->dst;
+ skb_dst_set(skb, dst);
+ break;
+ }
+ case htons(ETH_P_IPV6):
+ fl->u.ip6.flowi6_oif = dev->ifindex;
+ fl->u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC;
+ dst = ip6_route_output(dev_net(dev), NULL, &fl->u.ip6);
+ if (dst->error) {
+ dst_release(dst);
+ dst = NULL;
+ goto tx_err_link_failure;
+ }
+ skb_dst_set(skb, dst);
+ break;
+ default:
+ goto tx_err_link_failure;
+ }
+ }
dst_hold(dst);
dst = xfrm_lookup(t->net, dst, fl, NULL, 0);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 41f67629ae59..f38b22f54c09 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1668,6 +1668,10 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
struct net *net = sock_net(sk);
struct mr6_table *mrt;
+ if (sk->sk_type != SOCK_RAW ||
+ inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
+ return -EOPNOTSUPP;
+
mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
if (!mrt)
return -ENOENT;
@@ -1679,9 +1683,6 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
switch (optname) {
case MRT6_INIT:
- if (sk->sk_type != SOCK_RAW ||
- inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
- return -EOPNOTSUPP;
if (optlen < sizeof(int))
return -EINVAL;
@@ -1818,6 +1819,10 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
struct net *net = sock_net(sk);
struct mr6_table *mrt;
+ if (sk->sk_type != SOCK_RAW ||
+ inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
+ return -EOPNOTSUPP;
+
mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
if (!mrt)
return -ENOENT;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 81fd35ed8732..1080770b5eaf 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -184,9 +184,15 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
retv = -EBUSY;
break;
}
- } else if (sk->sk_protocol != IPPROTO_TCP)
+ } else if (sk->sk_protocol == IPPROTO_TCP) {
+ if (sk->sk_prot != &tcpv6_prot) {
+ retv = -EBUSY;
+ break;
+ }
break;
-
+ } else {
+ break;
+ }
if (sk->sk_state != TCP_ESTABLISHED) {
retv = -ENOTCONN;
break;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 40262abb15db..e065d48b31b9 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -772,12 +772,13 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
im->idev = pmc->idev;
im->mca_crcount = idev->mc_qrv;
if (im->mca_sfmode == MCAST_INCLUDE) {
- im->mca_tomb = pmc->mca_tomb;
- im->mca_sources = pmc->mca_sources;
+ swap(im->mca_tomb, pmc->mca_tomb);
+ swap(im->mca_sources, pmc->mca_sources);
for (psf = im->mca_sources; psf; psf = psf->sf_next)
psf->sf_crcount = im->mca_crcount;
}
in6_dev_put(pmc->idev);
+ ip6_mc_clear_src(pmc);
kfree(pmc);
}
spin_unlock_bh(&im->mca_lock);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index e46185377981..0b53d1907e4a 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -33,9 +33,8 @@
#include <net/sock.h>
#include <net/snmp.h>
-#include <net/inet_frag.h>
+#include <net/ipv6_frag.h>
-#include <net/ipv6.h>
#include <net/protocol.h>
#include <net/transp_v6.h>
#include <net/rawv6.h>
@@ -52,14 +51,6 @@
static const char nf_frags_cache_name[] = "nf-frags";
-struct nf_ct_frag6_skb_cb
-{
- struct inet6_skb_parm h;
- int offset;
-};
-
-#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb *)((skb)->cb))
-
static struct inet_frags nf_frags;
#ifdef CONFIG_SYSCTL
@@ -145,6 +136,9 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
}
#endif
+static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb,
+ struct sk_buff *prev_tail, struct net_device *dev);
+
static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
{
return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
@@ -158,7 +152,7 @@ static void nf_ct_frag6_expire(unsigned long data)
fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
net = container_of(fq->q.net, struct net, nf_frag.frags);
- ip6_expire_frag_queue(net, fq);
+ ip6frag_expire_frag_queue(net, fq);
}
/* Creation primitives. */
@@ -185,9 +179,10 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
const struct frag_hdr *fhdr, int nhoff)
{
- struct sk_buff *prev, *next;
unsigned int payload_len;
- int offset, end;
+ struct net_device *dev;
+ struct sk_buff *prev;
+ int offset, end, err;
u8 ecn;
if (fq->q.flags & INET_FRAG_COMPLETE) {
@@ -262,55 +257,25 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
goto err;
}
- /* Find out which fragments are in front and at the back of us
- * in the chain of fragments so far. We must know where to put
- * this fragment, right?
- */
+ /* Note : skb->rbnode and skb->dev share the same location. */
+ dev = skb->dev;
+ /* Makes sure compiler wont do silly aliasing games */
+ barrier();
+
prev = fq->q.fragments_tail;
- if (!prev || NFCT_FRAG6_CB(prev)->offset < offset) {
- next = NULL;
- goto found;
- }
- prev = NULL;
- for (next = fq->q.fragments; next != NULL; next = next->next) {
- if (NFCT_FRAG6_CB(next)->offset >= offset)
- break; /* bingo! */
- prev = next;
+ err = inet_frag_queue_insert(&fq->q, skb, offset, end);
+ if (err) {
+ if (err == IPFRAG_DUP) {
+ /* No error for duplicates, pretend they got queued. */
+ kfree_skb(skb);
+ return -EINPROGRESS;
+ }
+ goto insert_error;
}
-found:
- /* RFC5722, Section 4:
- * When reassembling an IPv6 datagram, if
- * one or more its constituent fragments is determined to be an
- * overlapping fragment, the entire datagram (and any constituent
- * fragments, including those not yet received) MUST be silently
- * discarded.
- */
+ if (dev)
+ fq->iif = dev->ifindex;
- /* Check for overlap with preceding fragment. */
- if (prev &&
- (NFCT_FRAG6_CB(prev)->offset + prev->len) > offset)
- goto discard_fq;
-
- /* Look for overlap with succeeding segment. */
- if (next && NFCT_FRAG6_CB(next)->offset < end)
- goto discard_fq;
-
- NFCT_FRAG6_CB(skb)->offset = offset;
-
- /* Insert this fragment in the chain of fragments. */
- skb->next = next;
- if (!next)
- fq->q.fragments_tail = skb;
- if (prev)
- prev->next = skb;
- else
- fq->q.fragments = skb;
-
- if (skb->dev) {
- fq->iif = skb->dev->ifindex;
- skb->dev = NULL;
- }
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
fq->ecn |= ecn;
@@ -326,11 +291,27 @@ found:
fq->q.flags |= INET_FRAG_FIRST_IN;
}
- return 0;
+ if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
+ fq->q.meat == fq->q.len) {
+ unsigned long orefdst = skb->_skb_refdst;
+
+ skb->_skb_refdst = 0UL;
+ err = nf_ct_frag6_reasm(fq, skb, prev, dev);
+ skb->_skb_refdst = orefdst;
-discard_fq:
+ /* After queue has assumed skb ownership, only 0 or
+ * -EINPROGRESS must be returned.
+ */
+ return err ? -EINPROGRESS : 0;
+ }
+
+ skb_dst_drop(skb);
+ return -EINPROGRESS;
+
+insert_error:
inet_frag_kill(&fq->q);
err:
+ skb_dst_drop(skb);
return -EINVAL;
}
@@ -340,141 +321,67 @@ err:
* It is called with locked fq, and caller must check that
* queue is eligible for reassembly i.e. it is not COMPLETE,
* the last and the first frames arrived and all the bits are here.
- *
- * returns true if *prev skb has been transformed into the reassembled
- * skb, false otherwise.
*/
-static bool
-nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev)
+static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb,
+ struct sk_buff *prev_tail, struct net_device *dev)
{
- struct sk_buff *fp, *head = fq->q.fragments;
- int payload_len;
+ void *reasm_data;
+ int payload_len;
u8 ecn;
inet_frag_kill(&fq->q);
- WARN_ON(head == NULL);
- WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
-
ecn = ip_frag_ecn_table[fq->ecn];
if (unlikely(ecn == 0xff))
- return false;
+ goto err;
- /* Unfragmented part is taken from the first segment. */
- payload_len = ((head->data - skb_network_header(head)) -
+ reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail);
+ if (!reasm_data)
+ goto err;
+
+ payload_len = ((skb->data - skb_network_header(skb)) -
sizeof(struct ipv6hdr) + fq->q.len -
sizeof(struct frag_hdr));
if (payload_len > IPV6_MAXPLEN) {
net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n",
payload_len);
- return false;
- }
-
- /* Head of list must not be cloned. */
- if (skb_unclone(head, GFP_ATOMIC))
- return false;
-
- /* If the first fragment is fragmented itself, we split
- * it to two chunks: the first with data and paged part
- * and the second, holding only fragments. */
- if (skb_has_frag_list(head)) {
- struct sk_buff *clone;
- int i, plen = 0;
-
- clone = alloc_skb(0, GFP_ATOMIC);
- if (clone == NULL)
- return false;
-
- clone->next = head->next;
- head->next = clone;
- skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
- skb_frag_list_init(head);
- for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
- plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
- clone->len = clone->data_len = head->data_len - plen;
- head->data_len -= clone->len;
- head->len -= clone->len;
- clone->csum = 0;
- clone->ip_summed = head->ip_summed;
-
- add_frag_mem_limit(fq->q.net, clone->truesize);
- }
-
- /* morph head into last received skb: prev.
- *
- * This allows callers of ipv6 conntrack defrag to continue
- * to use the last skb(frag) passed into the reasm engine.
- * The last skb frag 'silently' turns into the full reassembled skb.
- *
- * Since prev is also part of q->fragments we have to clone it first.
- */
- if (head != prev) {
- struct sk_buff *iter;
-
- fp = skb_clone(prev, GFP_ATOMIC);
- if (!fp)
- return false;
-
- fp->next = prev->next;
-
- iter = head;
- while (iter) {
- if (iter->next == prev) {
- iter->next = fp;
- break;
- }
- iter = iter->next;
- }
-
- skb_morph(prev, head);
- prev->next = head->next;
- consume_skb(head);
- head = prev;
+ goto err;
}
/* We have to remove fragment header from datagram and to relocate
* header in order to calculate ICV correctly. */
- skb_network_header(head)[fq->nhoffset] = skb_transport_header(head)[0];
- memmove(head->head + sizeof(struct frag_hdr), head->head,
- (head->data - head->head) - sizeof(struct frag_hdr));
- head->mac_header += sizeof(struct frag_hdr);
- head->network_header += sizeof(struct frag_hdr);
-
- skb_shinfo(head)->frag_list = head->next;
- skb_reset_transport_header(head);
- skb_push(head, head->data - skb_network_header(head));
-
- for (fp = head->next; fp; fp = fp->next) {
- head->data_len += fp->len;
- head->len += fp->len;
- if (head->ip_summed != fp->ip_summed)
- head->ip_summed = CHECKSUM_NONE;
- else if (head->ip_summed == CHECKSUM_COMPLETE)
- head->csum = csum_add(head->csum, fp->csum);
- head->truesize += fp->truesize;
- fp->sk = NULL;
- }
- sub_frag_mem_limit(fq->q.net, head->truesize);
+ skb_network_header(skb)[fq->nhoffset] = skb_transport_header(skb)[0];
+ memmove(skb->head + sizeof(struct frag_hdr), skb->head,
+ (skb->data - skb->head) - sizeof(struct frag_hdr));
+ skb->mac_header += sizeof(struct frag_hdr);
+ skb->network_header += sizeof(struct frag_hdr);
+
+ skb_reset_transport_header(skb);
- head->ignore_df = 1;
- head->next = NULL;
- head->dev = dev;
- head->tstamp = fq->q.stamp;
- ipv6_hdr(head)->payload_len = htons(payload_len);
- ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
- IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
+ inet_frag_reasm_finish(&fq->q, skb, reasm_data);
+
+ skb->ignore_df = 1;
+ skb->dev = dev;
+ ipv6_hdr(skb)->payload_len = htons(payload_len);
+ ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn);
+ IP6CB(skb)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
/* Yes, and fold redundant checksum back. 8) */
- if (head->ip_summed == CHECKSUM_COMPLETE)
- head->csum = csum_partial(skb_network_header(head),
- skb_network_header_len(head),
- head->csum);
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->csum = csum_partial(skb_network_header(skb),
+ skb_network_header_len(skb),
+ skb->csum);
fq->q.fragments = NULL;
fq->q.rb_fragments = RB_ROOT;
fq->q.fragments_tail = NULL;
+ fq->q.last_run_head = NULL;
+
+ return 0;
- return true;
+err:
+ inet_frag_kill(&fq->q);
+ return -EINVAL;
}
/*
@@ -543,7 +450,6 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
{
u16 savethdr = skb->transport_header;
- struct net_device *dev = skb->dev;
int fhoff, nhoff, ret;
struct frag_hdr *fhdr;
struct frag_queue *fq;
@@ -566,10 +472,6 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
hdr = ipv6_hdr(skb);
fhdr = (struct frag_hdr *)skb_transport_header(skb);
- if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU &&
- fhdr->frag_off & htons(IP6_MF))
- return -EINVAL;
-
skb_orphan(skb);
fq = fq_find(net, fhdr->identification, user, hdr,
skb->dev ? skb->dev->ifindex : 0);
@@ -581,24 +483,11 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
spin_lock_bh(&fq->q.lock);
ret = nf_ct_frag6_queue(fq, skb, fhdr, nhoff);
- if (ret < 0) {
- if (ret == -EPROTO) {
- skb->transport_header = savethdr;
- ret = 0;
- }
- goto out_unlock;
- }
-
- /* after queue has assumed skb ownership, only 0 or -EINPROGRESS
- * must be returned.
- */
- ret = -EINPROGRESS;
- if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
- fq->q.meat == fq->q.len &&
- nf_ct_frag6_reasm(fq, skb, dev))
+ if (ret == -EPROTO) {
+ skb->transport_header = savethdr;
ret = 0;
+ }
-out_unlock:
spin_unlock_bh(&fq->q.lock);
inet_frag_put(&fq->q);
return ret;
@@ -634,16 +523,24 @@ static struct pernet_operations nf_ct_net_ops = {
.exit = nf_ct_net_exit,
};
+static const struct rhashtable_params nfct_rhash_params = {
+ .head_offset = offsetof(struct inet_frag_queue, node),
+ .hashfn = ip6frag_key_hashfn,
+ .obj_hashfn = ip6frag_obj_hashfn,
+ .obj_cmpfn = ip6frag_obj_cmpfn,
+ .automatic_shrinking = true,
+};
+
int nf_ct_frag6_init(void)
{
int ret = 0;
- nf_frags.constructor = ip6_frag_init;
+ nf_frags.constructor = ip6frag_init;
nf_frags.destructor = NULL;
nf_frags.qsize = sizeof(struct frag_queue);
nf_frags.frag_expire = nf_ct_frag6_expire;
nf_frags.frags_cache_name = nf_frags_cache_name;
- nf_frags.rhash_params = ip6_rhash_params;
+ nf_frags.rhash_params = nfct_rhash_params;
ret = inet_frags_init(&nf_frags);
if (ret)
goto out;
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index f06b0471f39f..c4070e9c4260 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -14,8 +14,7 @@
#include <linux/skbuff.h>
#include <linux/icmp.h>
#include <linux/sysctl.h>
-#include <net/ipv6.h>
-#include <net/inet_frag.h>
+#include <net/ipv6_frag.h>
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter_bridge.h>
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index a338bbc33cf3..6a6d01cb1ace 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -10,15 +10,25 @@
#include <net/secure_seq.h>
#include <linux/netfilter.h>
-static u32 __ipv6_select_ident(struct net *net, u32 hashrnd,
+static u32 __ipv6_select_ident(struct net *net,
const struct in6_addr *dst,
const struct in6_addr *src)
{
+ const struct {
+ struct in6_addr dst;
+ struct in6_addr src;
+ } __aligned(SIPHASH_ALIGNMENT) combined = {
+ .dst = *dst,
+ .src = *src,
+ };
u32 hash, id;
- hash = __ipv6_addr_jhash(dst, hashrnd);
- hash = __ipv6_addr_jhash(src, hash);
- hash ^= net_hash_mix(net);
+ /* Note the following code is not safe, but this is okay. */
+ if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key)))
+ get_random_bytes(&net->ipv4.ip_id_key,
+ sizeof(net->ipv4.ip_id_key));
+
+ hash = siphash(&combined, sizeof(combined), &net->ipv4.ip_id_key);
/* Treat id of 0 as unset and if we get 0 back from ip_idents_reserve,
* set the hight order instead thus minimizing possible future
@@ -41,7 +51,6 @@ static u32 __ipv6_select_ident(struct net *net, u32 hashrnd,
*/
void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb)
{
- static u32 ip6_proxy_idents_hashrnd __read_mostly;
struct in6_addr buf[2];
struct in6_addr *addrs;
u32 id;
@@ -53,11 +62,7 @@ void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb)
if (!addrs)
return;
- net_get_random_once(&ip6_proxy_idents_hashrnd,
- sizeof(ip6_proxy_idents_hashrnd));
-
- id = __ipv6_select_ident(net, ip6_proxy_idents_hashrnd,
- &addrs[1], &addrs[0]);
+ id = __ipv6_select_ident(net, &addrs[1], &addrs[0]);
skb_shinfo(skb)->ip6_frag_id = htonl(id);
}
EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident);
@@ -66,12 +71,9 @@ __be32 ipv6_select_ident(struct net *net,
const struct in6_addr *daddr,
const struct in6_addr *saddr)
{
- static u32 ip6_idents_hashrnd __read_mostly;
u32 id;
- net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
-
- id = __ipv6_select_ident(net, ip6_idents_hashrnd, daddr, saddr);
+ id = __ipv6_select_ident(net, daddr, saddr);
return htonl(id);
}
EXPORT_SYMBOL(ipv6_select_ident);
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 982868193dbb..e209ae19fe78 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -239,7 +239,7 @@ static int __net_init ping_v6_proc_init_net(struct net *net)
return ping_proc_register(net, &ping_v6_seq_afinfo);
}
-static void __net_init ping_v6_proc_exit_net(struct net *net)
+static void __net_exit ping_v6_proc_exit_net(struct net *net)
{
return ping_proc_unregister(net, &ping_v6_seq_afinfo);
}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index a4f979ff31b9..301978df650e 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -283,7 +283,9 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
/* Binding to link-local address requires an interface */
if (!sk->sk_bound_dev_if)
goto out_unlock;
+ }
+ if (sk->sk_bound_dev_if) {
err = -ENODEV;
dev = dev_get_by_index_rcu(sock_net(sk),
sk->sk_bound_dev_if);
@@ -772,6 +774,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
struct sockcm_cookie sockc;
struct ipcm6_cookie ipc6;
int addr_len = msg->msg_namelen;
+ int hdrincl;
u16 proto;
int err;
@@ -785,6 +788,13 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (msg->msg_flags & MSG_OOB)
return -EOPNOTSUPP;
+ /* hdrincl should be READ_ONCE(inet->hdrincl)
+ * but READ_ONCE() doesn't work with bit fields.
+ * Doing this indirectly yields the same result.
+ */
+ hdrincl = inet->hdrincl;
+ hdrincl = READ_ONCE(hdrincl);
+
/*
* Get and verify the address.
*/
@@ -878,11 +888,14 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
opt = ipv6_fixup_options(&opt_space, opt);
fl6.flowi6_proto = proto;
- rfv.msg = msg;
- rfv.hlen = 0;
- err = rawv6_probe_proto_opt(&rfv, &fl6);
- if (err)
- goto out;
+
+ if (!hdrincl) {
+ rfv.msg = msg;
+ rfv.hlen = 0;
+ err = rawv6_probe_proto_opt(&rfv, &fl6);
+ if (err)
+ goto out;
+ }
if (!ipv6_addr_any(daddr))
fl6.daddr = *daddr;
@@ -899,7 +912,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl6.flowi6_oif = np->ucast_oif;
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
- if (inet->hdrincl)
+ if (hdrincl)
fl6.flowi6_flags |= FLOWI_FLAG_KNOWN_NH;
if (ipc6.tclass < 0)
@@ -922,7 +935,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
goto do_confirm;
back_from_confirm:
- if (inet->hdrincl)
+ if (hdrincl)
err = rawv6_send_hdrinc(sk, msg, len, &fl6, &dst, msg->msg_flags);
else {
ipc6.opt = opt;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 74ffbcb306a6..3f488555999e 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -57,18 +57,11 @@
#include <net/rawv6.h>
#include <net/ndisc.h>
#include <net/addrconf.h>
-#include <net/inet_frag.h>
+#include <net/ipv6_frag.h>
#include <net/inet_ecn.h>
static const char ip6_frag_cache_name[] = "ip6-frags";
-struct ip6frag_skb_cb {
- struct inet6_skb_parm h;
- int offset;
-};
-
-#define FRAG6_CB(skb) ((struct ip6frag_skb_cb *)((skb)->cb))
-
static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
{
return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
@@ -76,63 +69,8 @@ static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
static struct inet_frags ip6_frags;
-static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
- struct net_device *dev);
-
-void ip6_frag_init(struct inet_frag_queue *q, const void *a)
-{
- struct frag_queue *fq = container_of(q, struct frag_queue, q);
- const struct frag_v6_compare_key *key = a;
-
- q->key.v6 = *key;
- fq->ecn = 0;
-}
-EXPORT_SYMBOL(ip6_frag_init);
-
-void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq)
-{
- struct net_device *dev = NULL;
- struct sk_buff *head;
-
- rcu_read_lock();
- spin_lock(&fq->q.lock);
-
- if (fq->q.flags & INET_FRAG_COMPLETE)
- goto out;
-
- inet_frag_kill(&fq->q);
-
- dev = dev_get_by_index_rcu(net, fq->iif);
- if (!dev)
- goto out;
-
- __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
- __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
-
- /* Don't send error if the first segment did not arrive. */
- head = fq->q.fragments;
- if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head)
- goto out;
-
- /* But use as source device on which LAST ARRIVED
- * segment was received. And do not use fq->dev
- * pointer directly, device might already disappeared.
- */
- head->dev = dev;
- skb_get(head);
- spin_unlock(&fq->q.lock);
-
- icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
- kfree_skb(head);
- goto out_rcu_unlock;
-
-out:
- spin_unlock(&fq->q.lock);
-out_rcu_unlock:
- rcu_read_unlock();
- inet_frag_put(&fq->q);
-}
-EXPORT_SYMBOL(ip6_expire_frag_queue);
+static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
+ struct sk_buff *prev_tail, struct net_device *dev);
static void ip6_frag_expire(unsigned long data)
{
@@ -142,7 +80,7 @@ static void ip6_frag_expire(unsigned long data)
fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
net = container_of(fq->q.net, struct net, ipv6.frags);
- ip6_expire_frag_queue(net, fq);
+ ip6frag_expire_frag_queue(net, fq);
}
static struct frag_queue *
@@ -169,27 +107,29 @@ fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif)
}
static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
- struct frag_hdr *fhdr, int nhoff)
+ struct frag_hdr *fhdr, int nhoff,
+ u32 *prob_offset)
{
- struct sk_buff *prev, *next;
- struct net_device *dev;
- int offset, end;
struct net *net = dev_net(skb_dst(skb)->dev);
+ int offset, end, fragsize;
+ struct sk_buff *prev_tail;
+ struct net_device *dev;
+ int err = -ENOENT;
u8 ecn;
if (fq->q.flags & INET_FRAG_COMPLETE)
goto err;
+ err = -EINVAL;
offset = ntohs(fhdr->frag_off) & ~0x7;
end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
if ((unsigned int)end > IPV6_MAXPLEN) {
- __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INHDRERRORS);
- icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
- ((u8 *)&fhdr->frag_off -
- skb_network_header(skb)));
+ *prob_offset = (u8 *)&fhdr->frag_off - skb_network_header(skb);
+ /* note that if prob_offset is set, the skb is freed elsewhere,
+ * we do not free it here.
+ */
return -1;
}
@@ -209,7 +149,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
*/
if (end < fq->q.len ||
((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len))
- goto err;
+ goto discard_fq;
fq->q.flags |= INET_FRAG_LAST_IN;
fq->q.len = end;
} else {
@@ -220,84 +160,51 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
/* RFC2460 says always send parameter problem in
* this case. -DaveM
*/
- __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INHDRERRORS);
- icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
- offsetof(struct ipv6hdr, payload_len));
+ *prob_offset = offsetof(struct ipv6hdr, payload_len);
return -1;
}
if (end > fq->q.len) {
/* Some bits beyond end -> corruption. */
if (fq->q.flags & INET_FRAG_LAST_IN)
- goto err;
+ goto discard_fq;
fq->q.len = end;
}
}
if (end == offset)
- goto err;
+ goto discard_fq;
+ err = -ENOMEM;
/* Point into the IP datagram 'data' part. */
if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data))
- goto err;
-
- if (pskb_trim_rcsum(skb, end - offset))
- goto err;
-
- /* Find out which fragments are in front and at the back of us
- * in the chain of fragments so far. We must know where to put
- * this fragment, right?
- */
- prev = fq->q.fragments_tail;
- if (!prev || FRAG6_CB(prev)->offset < offset) {
- next = NULL;
- goto found;
- }
- prev = NULL;
- for (next = fq->q.fragments; next != NULL; next = next->next) {
- if (FRAG6_CB(next)->offset >= offset)
- break; /* bingo! */
- prev = next;
- }
-
-found:
- /* RFC5722, Section 4, amended by Errata ID : 3089
- * When reassembling an IPv6 datagram, if
- * one or more its constituent fragments is determined to be an
- * overlapping fragment, the entire datagram (and any constituent
- * fragments) MUST be silently discarded.
- */
-
- /* Check for overlap with preceding fragment. */
- if (prev &&
- (FRAG6_CB(prev)->offset + prev->len) > offset)
goto discard_fq;
- /* Look for overlap with succeeding segment. */
- if (next && FRAG6_CB(next)->offset < end)
+ err = pskb_trim_rcsum(skb, end - offset);
+ if (err)
goto discard_fq;
- FRAG6_CB(skb)->offset = offset;
+ /* Note : skb->rbnode and skb->dev share the same location. */
+ dev = skb->dev;
+ /* Makes sure compiler wont do silly aliasing games */
+ barrier();
- /* Insert this fragment in the chain of fragments. */
- skb->next = next;
- if (!next)
- fq->q.fragments_tail = skb;
- if (prev)
- prev->next = skb;
- else
- fq->q.fragments = skb;
+ prev_tail = fq->q.fragments_tail;
+ err = inet_frag_queue_insert(&fq->q, skb, offset, end);
+ if (err)
+ goto insert_error;
- dev = skb->dev;
- if (dev) {
+ if (dev)
fq->iif = dev->ifindex;
- skb->dev = NULL;
- }
+
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
fq->ecn |= ecn;
add_frag_mem_limit(fq->q.net, skb->truesize);
+ fragsize = -skb_network_offset(skb) + skb->len;
+ if (fragsize > fq->q.max_size)
+ fq->q.max_size = fragsize;
+
/* The first fragment.
* nhoffset is obtained from the first fragment, of course.
*/
@@ -308,44 +215,48 @@ found:
if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
fq->q.meat == fq->q.len) {
- int res;
unsigned long orefdst = skb->_skb_refdst;
skb->_skb_refdst = 0UL;
- res = ip6_frag_reasm(fq, prev, dev);
+ err = ip6_frag_reasm(fq, skb, prev_tail, dev);
skb->_skb_refdst = orefdst;
- return res;
+ return err;
}
skb_dst_drop(skb);
- return -1;
+ return -EINPROGRESS;
+insert_error:
+ if (err == IPFRAG_DUP) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+ err = -EINVAL;
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_REASM_OVERLAPS);
discard_fq:
inet_frag_kill(&fq->q);
-err:
__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_REASMFAILS);
+err:
kfree_skb(skb);
- return -1;
+ return err;
}
/*
* Check if this packet is complete.
- * Returns NULL on failure by any reason, and pointer
- * to current nexthdr field in reassembled frame.
*
* It is called with locked fq, and caller must check that
* queue is eligible for reassembly i.e. it is not COMPLETE,
* the last and the first frames arrived and all the bits are here.
*/
-static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
- struct net_device *dev)
+static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
+ struct sk_buff *prev_tail, struct net_device *dev)
{
struct net *net = container_of(fq->q.net, struct net, ipv6.frags);
- struct sk_buff *fp, *head = fq->q.fragments;
- int payload_len;
unsigned int nhoff;
- int sum_truesize;
+ void *reasm_data;
+ int payload_len;
u8 ecn;
inet_frag_kill(&fq->q);
@@ -354,113 +265,40 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
if (unlikely(ecn == 0xff))
goto out_fail;
- /* Make the one we just received the head. */
- if (prev) {
- head = prev->next;
- fp = skb_clone(head, GFP_ATOMIC);
-
- if (!fp)
- goto out_oom;
-
- fp->next = head->next;
- if (!fp->next)
- fq->q.fragments_tail = fp;
- prev->next = fp;
-
- skb_morph(head, fq->q.fragments);
- head->next = fq->q.fragments->next;
-
- consume_skb(fq->q.fragments);
- fq->q.fragments = head;
- }
-
- WARN_ON(head == NULL);
- WARN_ON(FRAG6_CB(head)->offset != 0);
+ reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail);
+ if (!reasm_data)
+ goto out_oom;
- /* Unfragmented part is taken from the first segment. */
- payload_len = ((head->data - skb_network_header(head)) -
+ payload_len = ((skb->data - skb_network_header(skb)) -
sizeof(struct ipv6hdr) + fq->q.len -
sizeof(struct frag_hdr));
if (payload_len > IPV6_MAXPLEN)
goto out_oversize;
- /* Head of list must not be cloned. */
- if (skb_unclone(head, GFP_ATOMIC))
- goto out_oom;
-
- /* If the first fragment is fragmented itself, we split
- * it to two chunks: the first with data and paged part
- * and the second, holding only fragments. */
- if (skb_has_frag_list(head)) {
- struct sk_buff *clone;
- int i, plen = 0;
-
- clone = alloc_skb(0, GFP_ATOMIC);
- if (!clone)
- goto out_oom;
- clone->next = head->next;
- head->next = clone;
- skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
- skb_frag_list_init(head);
- for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
- plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
- clone->len = clone->data_len = head->data_len - plen;
- head->data_len -= clone->len;
- head->len -= clone->len;
- clone->csum = 0;
- clone->ip_summed = head->ip_summed;
- add_frag_mem_limit(fq->q.net, clone->truesize);
- }
-
/* We have to remove fragment header from datagram and to relocate
* header in order to calculate ICV correctly. */
nhoff = fq->nhoffset;
- skb_network_header(head)[nhoff] = skb_transport_header(head)[0];
- memmove(head->head + sizeof(struct frag_hdr), head->head,
- (head->data - head->head) - sizeof(struct frag_hdr));
- if (skb_mac_header_was_set(head))
- head->mac_header += sizeof(struct frag_hdr);
- head->network_header += sizeof(struct frag_hdr);
-
- skb_reset_transport_header(head);
- skb_push(head, head->data - skb_network_header(head));
-
- sum_truesize = head->truesize;
- for (fp = head->next; fp;) {
- bool headstolen;
- int delta;
- struct sk_buff *next = fp->next;
-
- sum_truesize += fp->truesize;
- if (head->ip_summed != fp->ip_summed)
- head->ip_summed = CHECKSUM_NONE;
- else if (head->ip_summed == CHECKSUM_COMPLETE)
- head->csum = csum_add(head->csum, fp->csum);
-
- if (skb_try_coalesce(head, fp, &headstolen, &delta)) {
- kfree_skb_partial(fp, headstolen);
- } else {
- if (!skb_shinfo(head)->frag_list)
- skb_shinfo(head)->frag_list = fp;
- head->data_len += fp->len;
- head->len += fp->len;
- head->truesize += fp->truesize;
- }
- fp = next;
- }
- sub_frag_mem_limit(fq->q.net, sum_truesize);
+ skb_network_header(skb)[nhoff] = skb_transport_header(skb)[0];
+ memmove(skb->head + sizeof(struct frag_hdr), skb->head,
+ (skb->data - skb->head) - sizeof(struct frag_hdr));
+ if (skb_mac_header_was_set(skb))
+ skb->mac_header += sizeof(struct frag_hdr);
+ skb->network_header += sizeof(struct frag_hdr);
+
+ skb_reset_transport_header(skb);
- head->next = NULL;
- head->dev = dev;
- head->tstamp = fq->q.stamp;
- ipv6_hdr(head)->payload_len = htons(payload_len);
- ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
- IP6CB(head)->nhoff = nhoff;
- IP6CB(head)->flags |= IP6SKB_FRAGMENTED;
+ inet_frag_reasm_finish(&fq->q, skb, reasm_data);
+
+ skb->dev = dev;
+ ipv6_hdr(skb)->payload_len = htons(payload_len);
+ ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn);
+ IP6CB(skb)->nhoff = nhoff;
+ IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
+ IP6CB(skb)->frag_max_size = fq->q.max_size;
/* Yes, and fold redundant checksum back. 8) */
- skb_postpush_rcsum(head, skb_network_header(head),
- skb_network_header_len(head));
+ skb_postpush_rcsum(skb, skb_network_header(skb),
+ skb_network_header_len(skb));
rcu_read_lock();
__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
@@ -468,6 +306,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
fq->q.fragments = NULL;
fq->q.rb_fragments = RB_ROOT;
fq->q.fragments_tail = NULL;
+ fq->q.last_run_head = NULL;
return 1;
out_oversize:
@@ -479,6 +318,7 @@ out_fail:
rcu_read_lock();
__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
rcu_read_unlock();
+ inet_frag_kill(&fq->q);
return -1;
}
@@ -517,22 +357,26 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
return 1;
}
- if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU &&
- fhdr->frag_off & htons(IP6_MF))
- goto fail_hdr;
-
iif = skb->dev ? skb->dev->ifindex : 0;
fq = fq_find(net, fhdr->identification, hdr, iif);
if (fq) {
+ u32 prob_offset = 0;
int ret;
spin_lock(&fq->q.lock);
fq->iif = iif;
- ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
+ ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff,
+ &prob_offset);
spin_unlock(&fq->q.lock);
inet_frag_put(&fq->q);
+ if (prob_offset) {
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INHDRERRORS);
+ /* icmpv6_param_prob() calls kfree_skb(skb) */
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, prob_offset);
+ }
return ret;
}
@@ -700,42 +544,19 @@ static struct pernet_operations ip6_frags_ops = {
.exit = ipv6_frags_exit_net,
};
-static u32 ip6_key_hashfn(const void *data, u32 len, u32 seed)
-{
- return jhash2(data,
- sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
-}
-
-static u32 ip6_obj_hashfn(const void *data, u32 len, u32 seed)
-{
- const struct inet_frag_queue *fq = data;
-
- return jhash2((const u32 *)&fq->key.v6,
- sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
-}
-
-static int ip6_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
-{
- const struct frag_v6_compare_key *key = arg->key;
- const struct inet_frag_queue *fq = ptr;
-
- return !!memcmp(&fq->key, key, sizeof(*key));
-}
-
-const struct rhashtable_params ip6_rhash_params = {
+static const struct rhashtable_params ip6_rhash_params = {
.head_offset = offsetof(struct inet_frag_queue, node),
- .hashfn = ip6_key_hashfn,
- .obj_hashfn = ip6_obj_hashfn,
- .obj_cmpfn = ip6_obj_cmpfn,
+ .hashfn = ip6frag_key_hashfn,
+ .obj_hashfn = ip6frag_obj_hashfn,
+ .obj_cmpfn = ip6frag_obj_cmpfn,
.automatic_shrinking = true,
};
-EXPORT_SYMBOL(ip6_rhash_params);
int __init ipv6_frag_init(void)
{
int ret;
- ip6_frags.constructor = ip6_frag_init;
+ ip6_frags.constructor = ip6frag_init;
ip6_frags.destructor = NULL;
ip6_frags.qsize = sizeof(struct frag_queue);
ip6_frags.frag_expire = ip6_frag_expire;
@@ -771,8 +592,8 @@ err_protocol:
void ipv6_frag_exit(void)
{
- inet_frags_fini(&ip6_frags);
ip6_frags_sysctl_unregister();
unregister_pernet_subsys(&ip6_frags_ops);
inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
+ inet_frags_fini(&ip6_frags);
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 27c93baed708..2c4743f2d50e 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3069,6 +3069,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg)
*/
cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
NLM_F_REPLACE);
+ cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE;
nhn++;
}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index c9c6a5e829ab..16eba7b5f1a9 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -661,6 +661,10 @@ static int ipip6_rcv(struct sk_buff *skb)
!net_eq(tunnel->net, dev_net(tunnel->dev))))
goto out;
+ /* skb can be uncloned in iptunnel_pull_header, so
+ * old iph is no longer valid
+ */
+ iph = (const struct iphdr *)skb_mac_header(skb);
err = IP_ECN_decapsulate(iph, skb);
if (unlikely(err)) {
if (log_ecn_error)
@@ -1065,7 +1069,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
if (!tdev && tunnel->parms.link)
tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
- if (tdev) {
+ if (tdev && !netif_is_l3_master(tdev)) {
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
@@ -1852,7 +1856,6 @@ static int __net_init sit_init_net(struct net *net)
err_reg_dev:
ipip6_dev_free(sitn->fb_tunnel_dev);
- free_netdev(sitn->fb_tunnel_dev);
err_alloc_dev:
return err;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 0a69d39880f2..4953466cf98f 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1056,11 +1056,11 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
newnp->ipv6_fl_list = NULL;
newnp->pktoptions = NULL;
newnp->opt = NULL;
- newnp->mcast_oif = tcp_v6_iif(skb);
- newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
- newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
+ newnp->mcast_oif = inet_iif(skb);
+ newnp->mcast_hops = ip_hdr(skb)->ttl;
+ newnp->rcv_flowinfo = 0;
if (np->repflow)
- newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
+ newnp->flow_label = 0;
/*
* No need to charge this sock to the relevant IPv6 refcnt debug socks count
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 4db5f541bca6..6a397e110b46 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -294,7 +294,7 @@ struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
&iph->daddr, dport, inet6_iif(skb),
- &udp_table, skb);
+ &udp_table, NULL);
}
EXPORT_SYMBOL_GPL(udp6_lib_lookup_skb);
@@ -479,7 +479,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct net *net = dev_net(skb->dev);
sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
- inet6_iif(skb), udptable, skb);
+ inet6_iif(skb), udptable, NULL);
if (!sk) {
__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
ICMP6_MIB_INERRORS);
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 3a2701d42f47..07b7b2540579 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -391,6 +391,10 @@ static void __exit xfrm6_tunnel_fini(void)
xfrm6_tunnel_deregister(&xfrm6_tunnel_handler, AF_INET6);
xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6);
unregister_pernet_subsys(&xfrm6_tunnel_net_ops);
+ /* Someone maybe has gotten the xfrm6_tunnel_spi.
+ * So need to wait it.
+ */
+ rcu_barrier();
kmem_cache_destroy(xfrm6_tunnel_spi_kmem);
}