diff options
Diffstat (limited to 'net/ipv6')
-rw-r--r-- | net/ipv6/addrconf.c | 21 | ||||
-rw-r--r-- | net/ipv6/inet6_hashtables.c | 3 | ||||
-rw-r--r-- | net/ipv6/ip6_fib.c | 7 | ||||
-rw-r--r-- | net/ipv6/ip6_flowlabel.c | 29 | ||||
-rw-r--r-- | net/ipv6/ip6_input.c | 10 | ||||
-rw-r--r-- | net/ipv6/ip6_output.c | 4 | ||||
-rw-r--r-- | net/ipv6/ip6_tunnel.c | 16 | ||||
-rw-r--r-- | net/ipv6/ip6_vti.c | 33 | ||||
-rw-r--r-- | net/ipv6/ip6mr.c | 11 | ||||
-rw-r--r-- | net/ipv6/ipv6_sockglue.c | 10 | ||||
-rw-r--r-- | net/ipv6/mcast.c | 5 | ||||
-rw-r--r-- | net/ipv6/netfilter/nf_conntrack_reasm.c | 281 | ||||
-rw-r--r-- | net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 3 | ||||
-rw-r--r-- | net/ipv6/output_core.c | 30 | ||||
-rw-r--r-- | net/ipv6/ping.c | 2 | ||||
-rw-r--r-- | net/ipv6/raw.c | 27 | ||||
-rw-r--r-- | net/ipv6/reassembly.c | 363 | ||||
-rw-r--r-- | net/ipv6/route.c | 1 | ||||
-rw-r--r-- | net/ipv6/sit.c | 7 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 8 | ||||
-rw-r--r-- | net/ipv6/udp.c | 4 | ||||
-rw-r--r-- | net/ipv6/xfrm6_tunnel.c | 4 |
22 files changed, 344 insertions, 535 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4ce7f9195151..a4c00242a90b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3189,6 +3189,10 @@ static void addrconf_dev_config(struct net_device *dev) (dev->type != ARPHRD_6LOWPAN) && (dev->type != ARPHRD_NONE)) { /* Alas, we support only Ethernet autoconfiguration. */ + idev = __in6_dev_get(dev); + if (!IS_ERR_OR_NULL(idev) && dev->flags & IFF_UP && + dev->flags & IFF_MULTICAST) + ipv6_mc_up(idev); return; } @@ -5443,13 +5447,20 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) switch (event) { case RTM_NEWADDR: /* - * If the address was optimistic - * we inserted the route at the start of - * our DAD process, so we don't need - * to do it again + * If the address was optimistic we inserted the route at the + * start of our DAD process, so we don't need to do it again. + * If the device was taken down in the middle of the DAD + * cycle there is a race where we could get here without a + * host route, so nothing to insert. That will be fixed when + * the device is brought up. */ - if (!rcu_access_pointer(ifp->rt->rt6i_node)) + if (ifp->rt && !rcu_access_pointer(ifp->rt->rt6i_node)) { ip6_ins_rt(ifp->rt); + } else if (!ifp->rt && (ifp->idev->dev->flags & IFF_UP)) { + pr_warn("BUG: Address %pI6c on device %s is missing its host route.\n", + &ifp->addr, ifp->idev->dev->name); + } + if (ifp->idev->cnf.forwarding) addrconf_join_anycast(ifp); if (!ipv6_addr_any(&ifp->peer_addr)) diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 02761c9fe43e..d47cab6d7c6d 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -133,9 +133,10 @@ struct sock *inet6_lookup_listener(struct net *net, int score, hiscore = 0, matches = 0, reuseport = 0; bool exact_dif = inet6_exact_dif_match(net, skb); struct sock *sk, *result = NULL; + struct hlist_nulls_node *node; u32 phash = 0; - sk_for_each(sk, &ilb->head) { + sk_nulls_for_each(sk, node, &ilb->nulls_head) { score = compute_score(sk, net, hnum, daddr, dif, exact_dif); if (score > hiscore) { reuseport = sk->sk_reuseport; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 5da864997495..85c7e250c7a8 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -784,8 +784,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, found++; break; } - if (rt_can_ecmp) - fallback_ins = fallback_ins ?: ins; + fallback_ins = fallback_ins ?: ins; goto next_iter; } @@ -825,7 +824,9 @@ next_iter: } if (fallback_ins && !found) { - /* No ECMP-able route found, replace first non-ECMP one */ + /* No matching route with same ecmp-able-ness found, replace + * first matching route + */ ins = fallback_ins; iter = *ins; found++; diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index b82e439804d1..6a6011160f18 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -94,15 +94,21 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label) return fl; } +static void fl_free_rcu(struct rcu_head *head) +{ + struct ip6_flowlabel *fl = container_of(head, struct ip6_flowlabel, rcu); + + if (fl->share == IPV6_FL_S_PROCESS) + put_pid(fl->owner.pid); + kfree(fl->opt); + kfree(fl); +} + static void fl_free(struct ip6_flowlabel *fl) { - if (fl) { - if (fl->share == IPV6_FL_S_PROCESS) - put_pid(fl->owner.pid); - kfree(fl->opt); - kfree_rcu(fl, rcu); - } + if (fl) + call_rcu(&fl->rcu, fl_free_rcu); } static void fl_release(struct ip6_flowlabel *fl) @@ -248,9 +254,9 @@ struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label) rcu_read_lock_bh(); for_each_sk_fl_rcu(np, sfl) { struct ip6_flowlabel *fl = sfl->fl; - if (fl->label == label) { + + if (fl->label == label && atomic_inc_not_zero(&fl->users)) { fl->lastuse = jiffies; - atomic_inc(&fl->users); rcu_read_unlock_bh(); return fl; } @@ -617,7 +623,8 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) goto done; } fl1 = sfl->fl; - atomic_inc(&fl1->users); + if (!atomic_inc_not_zero(&fl1->users)) + fl1 = NULL; break; } } @@ -634,9 +641,9 @@ recheck: if (fl1->share == IPV6_FL_S_EXCL || fl1->share != fl->share || ((fl1->share == IPV6_FL_S_PROCESS) && - (fl1->owner.pid == fl->owner.pid)) || + (fl1->owner.pid != fl->owner.pid)) || ((fl1->share == IPV6_FL_S_USER) && - uid_eq(fl1->owner.uid, fl->owner.uid))) + !uid_eq(fl1->owner.uid, fl->owner.uid))) goto release; err = -ENOMEM; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index aacfb4bce153..e726a61ae6dc 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -168,6 +168,16 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt if (ipv6_addr_is_multicast(&hdr->saddr)) goto err; + /* While RFC4291 is not explicit about v4mapped addresses + * in IPv6 headers, it seems clear linux dual-stack + * model can not deal properly with these. + * Security models could be fooled by ::ffff:127.0.0.1 for example. + * + * https://tools.ietf.org/html/draft-itojun-v6ops-v4mapped-harmful-02 + */ + if (ipv6_addr_v4mapped(&hdr->saddr)) + goto err; + skb->transport_header = skb->network_header + sizeof(*hdr); IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index b723987761be..11407dd6bc7c 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -592,7 +592,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, inet6_sk(skb->sk) : NULL; struct ipv6hdr *tmp_hdr; struct frag_hdr *fh; - unsigned int mtu, hlen, left, len; + unsigned int mtu, hlen, left, len, nexthdr_offset; int hroom, troom; __be32 frag_id; int ptr, offset = 0, err = 0; @@ -603,6 +603,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, goto fail; hlen = err; nexthdr = *prevhdr; + nexthdr_offset = prevhdr - skb_network_header(skb); mtu = ip6_skb_dst_mtu(skb); @@ -637,6 +638,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, (err = skb_checksum_help(skb))) goto fail; + prevhdr = skb_network_header(skb) + nexthdr_offset; hroom = LL_RESERVED_SPACE(rt->dst.dev); if (skb_has_frag_list(skb)) { int first_len = skb_pagelen(skb); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index f89516d04150..ed795d50a8d2 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -634,7 +634,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, IPPROTO_IPIP, RT_TOS(eiph->tos), 0); if (IS_ERR(rt) || - rt->dst.dev->type != ARPHRD_TUNNEL) { + rt->dst.dev->type != ARPHRD_TUNNEL6) { if (!IS_ERR(rt)) ip_rt_put(rt); goto out; @@ -644,7 +644,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ip_rt_put(rt); if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || - skb_dst(skb2)->dev->type != ARPHRD_TUNNEL) + skb_dst(skb2)->dev->type != ARPHRD_TUNNEL6) goto out; } @@ -1275,11 +1275,11 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowi6_mark = skb->mark; } + dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph)); + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; - dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph)); - skb_set_inner_ipproto(skb, IPPROTO_IPIP); err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, @@ -1362,11 +1362,11 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowi6_mark = skb->mark; } + dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h)); + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; - dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h)); - skb_set_inner_ipproto(skb, IPPROTO_IPV6); err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, @@ -1861,10 +1861,8 @@ static int ip6_tnl_dev_init(struct net_device *dev) if (err) return err; ip6_tnl_link_config(t); - if (t->parms.collect_md) { - dev->features |= NETIF_F_NETNS_LOCAL; + if (t->parms.collect_md) netif_keep_dst(dev); - } return 0; } diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index c2b2ee71fc6c..b9f5155a77ef 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -315,7 +315,7 @@ static int vti6_rcv(struct sk_buff *skb) if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { rcu_read_unlock(); - return 0; + goto discard; } ipv6h = ipv6_hdr(skb); @@ -453,8 +453,35 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) int err = -1; int mtu; - if (!dst) - goto tx_err_link_failure; + if (!dst) { + switch (skb->protocol) { + case htons(ETH_P_IP): { + struct rtable *rt; + + fl->u.ip4.flowi4_oif = dev->ifindex; + fl->u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC; + rt = __ip_route_output_key(dev_net(dev), &fl->u.ip4); + if (IS_ERR(rt)) + goto tx_err_link_failure; + dst = &rt->dst; + skb_dst_set(skb, dst); + break; + } + case htons(ETH_P_IPV6): + fl->u.ip6.flowi6_oif = dev->ifindex; + fl->u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC; + dst = ip6_route_output(dev_net(dev), NULL, &fl->u.ip6); + if (dst->error) { + dst_release(dst); + dst = NULL; + goto tx_err_link_failure; + } + skb_dst_set(skb, dst); + break; + default: + goto tx_err_link_failure; + } + } dst_hold(dst); dst = xfrm_lookup(t->net, dst, fl, NULL, 0); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 41f67629ae59..f38b22f54c09 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1668,6 +1668,10 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns struct net *net = sock_net(sk); struct mr6_table *mrt; + if (sk->sk_type != SOCK_RAW || + inet_sk(sk)->inet_num != IPPROTO_ICMPV6) + return -EOPNOTSUPP; + mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); if (!mrt) return -ENOENT; @@ -1679,9 +1683,6 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns switch (optname) { case MRT6_INIT: - if (sk->sk_type != SOCK_RAW || - inet_sk(sk)->inet_num != IPPROTO_ICMPV6) - return -EOPNOTSUPP; if (optlen < sizeof(int)) return -EINVAL; @@ -1818,6 +1819,10 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, struct net *net = sock_net(sk); struct mr6_table *mrt; + if (sk->sk_type != SOCK_RAW || + inet_sk(sk)->inet_num != IPPROTO_ICMPV6) + return -EOPNOTSUPP; + mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); if (!mrt) return -ENOENT; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 81fd35ed8732..1080770b5eaf 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -184,9 +184,15 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, retv = -EBUSY; break; } - } else if (sk->sk_protocol != IPPROTO_TCP) + } else if (sk->sk_protocol == IPPROTO_TCP) { + if (sk->sk_prot != &tcpv6_prot) { + retv = -EBUSY; + break; + } break; - + } else { + break; + } if (sk->sk_state != TCP_ESTABLISHED) { retv = -ENOTCONN; break; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 40262abb15db..e065d48b31b9 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -772,12 +772,13 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) im->idev = pmc->idev; im->mca_crcount = idev->mc_qrv; if (im->mca_sfmode == MCAST_INCLUDE) { - im->mca_tomb = pmc->mca_tomb; - im->mca_sources = pmc->mca_sources; + swap(im->mca_tomb, pmc->mca_tomb); + swap(im->mca_sources, pmc->mca_sources); for (psf = im->mca_sources; psf; psf = psf->sf_next) psf->sf_crcount = im->mca_crcount; } in6_dev_put(pmc->idev); + ip6_mc_clear_src(pmc); kfree(pmc); } spin_unlock_bh(&im->mca_lock); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index e46185377981..0b53d1907e4a 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -33,9 +33,8 @@ #include <net/sock.h> #include <net/snmp.h> -#include <net/inet_frag.h> +#include <net/ipv6_frag.h> -#include <net/ipv6.h> #include <net/protocol.h> #include <net/transp_v6.h> #include <net/rawv6.h> @@ -52,14 +51,6 @@ static const char nf_frags_cache_name[] = "nf-frags"; -struct nf_ct_frag6_skb_cb -{ - struct inet6_skb_parm h; - int offset; -}; - -#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb *)((skb)->cb)) - static struct inet_frags nf_frags; #ifdef CONFIG_SYSCTL @@ -145,6 +136,9 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net) } #endif +static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb, + struct sk_buff *prev_tail, struct net_device *dev); + static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) { return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK); @@ -158,7 +152,7 @@ static void nf_ct_frag6_expire(unsigned long data) fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); net = container_of(fq->q.net, struct net, nf_frag.frags); - ip6_expire_frag_queue(net, fq); + ip6frag_expire_frag_queue(net, fq); } /* Creation primitives. */ @@ -185,9 +179,10 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user, static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, const struct frag_hdr *fhdr, int nhoff) { - struct sk_buff *prev, *next; unsigned int payload_len; - int offset, end; + struct net_device *dev; + struct sk_buff *prev; + int offset, end, err; u8 ecn; if (fq->q.flags & INET_FRAG_COMPLETE) { @@ -262,55 +257,25 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, goto err; } - /* Find out which fragments are in front and at the back of us - * in the chain of fragments so far. We must know where to put - * this fragment, right? - */ + /* Note : skb->rbnode and skb->dev share the same location. */ + dev = skb->dev; + /* Makes sure compiler wont do silly aliasing games */ + barrier(); + prev = fq->q.fragments_tail; - if (!prev || NFCT_FRAG6_CB(prev)->offset < offset) { - next = NULL; - goto found; - } - prev = NULL; - for (next = fq->q.fragments; next != NULL; next = next->next) { - if (NFCT_FRAG6_CB(next)->offset >= offset) - break; /* bingo! */ - prev = next; + err = inet_frag_queue_insert(&fq->q, skb, offset, end); + if (err) { + if (err == IPFRAG_DUP) { + /* No error for duplicates, pretend they got queued. */ + kfree_skb(skb); + return -EINPROGRESS; + } + goto insert_error; } -found: - /* RFC5722, Section 4: - * When reassembling an IPv6 datagram, if - * one or more its constituent fragments is determined to be an - * overlapping fragment, the entire datagram (and any constituent - * fragments, including those not yet received) MUST be silently - * discarded. - */ + if (dev) + fq->iif = dev->ifindex; - /* Check for overlap with preceding fragment. */ - if (prev && - (NFCT_FRAG6_CB(prev)->offset + prev->len) > offset) - goto discard_fq; - - /* Look for overlap with succeeding segment. */ - if (next && NFCT_FRAG6_CB(next)->offset < end) - goto discard_fq; - - NFCT_FRAG6_CB(skb)->offset = offset; - - /* Insert this fragment in the chain of fragments. */ - skb->next = next; - if (!next) - fq->q.fragments_tail = skb; - if (prev) - prev->next = skb; - else - fq->q.fragments = skb; - - if (skb->dev) { - fq->iif = skb->dev->ifindex; - skb->dev = NULL; - } fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; fq->ecn |= ecn; @@ -326,11 +291,27 @@ found: fq->q.flags |= INET_FRAG_FIRST_IN; } - return 0; + if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && + fq->q.meat == fq->q.len) { + unsigned long orefdst = skb->_skb_refdst; + + skb->_skb_refdst = 0UL; + err = nf_ct_frag6_reasm(fq, skb, prev, dev); + skb->_skb_refdst = orefdst; -discard_fq: + /* After queue has assumed skb ownership, only 0 or + * -EINPROGRESS must be returned. + */ + return err ? -EINPROGRESS : 0; + } + + skb_dst_drop(skb); + return -EINPROGRESS; + +insert_error: inet_frag_kill(&fq->q); err: + skb_dst_drop(skb); return -EINVAL; } @@ -340,141 +321,67 @@ err: * It is called with locked fq, and caller must check that * queue is eligible for reassembly i.e. it is not COMPLETE, * the last and the first frames arrived and all the bits are here. - * - * returns true if *prev skb has been transformed into the reassembled - * skb, false otherwise. */ -static bool -nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev) +static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb, + struct sk_buff *prev_tail, struct net_device *dev) { - struct sk_buff *fp, *head = fq->q.fragments; - int payload_len; + void *reasm_data; + int payload_len; u8 ecn; inet_frag_kill(&fq->q); - WARN_ON(head == NULL); - WARN_ON(NFCT_FRAG6_CB(head)->offset != 0); - ecn = ip_frag_ecn_table[fq->ecn]; if (unlikely(ecn == 0xff)) - return false; + goto err; - /* Unfragmented part is taken from the first segment. */ - payload_len = ((head->data - skb_network_header(head)) - + reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail); + if (!reasm_data) + goto err; + + payload_len = ((skb->data - skb_network_header(skb)) - sizeof(struct ipv6hdr) + fq->q.len - sizeof(struct frag_hdr)); if (payload_len > IPV6_MAXPLEN) { net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n", payload_len); - return false; - } - - /* Head of list must not be cloned. */ - if (skb_unclone(head, GFP_ATOMIC)) - return false; - - /* If the first fragment is fragmented itself, we split - * it to two chunks: the first with data and paged part - * and the second, holding only fragments. */ - if (skb_has_frag_list(head)) { - struct sk_buff *clone; - int i, plen = 0; - - clone = alloc_skb(0, GFP_ATOMIC); - if (clone == NULL) - return false; - - clone->next = head->next; - head->next = clone; - skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; - skb_frag_list_init(head); - for (i = 0; i < skb_shinfo(head)->nr_frags; i++) - plen += skb_frag_size(&skb_shinfo(head)->frags[i]); - clone->len = clone->data_len = head->data_len - plen; - head->data_len -= clone->len; - head->len -= clone->len; - clone->csum = 0; - clone->ip_summed = head->ip_summed; - - add_frag_mem_limit(fq->q.net, clone->truesize); - } - - /* morph head into last received skb: prev. - * - * This allows callers of ipv6 conntrack defrag to continue - * to use the last skb(frag) passed into the reasm engine. - * The last skb frag 'silently' turns into the full reassembled skb. - * - * Since prev is also part of q->fragments we have to clone it first. - */ - if (head != prev) { - struct sk_buff *iter; - - fp = skb_clone(prev, GFP_ATOMIC); - if (!fp) - return false; - - fp->next = prev->next; - - iter = head; - while (iter) { - if (iter->next == prev) { - iter->next = fp; - break; - } - iter = iter->next; - } - - skb_morph(prev, head); - prev->next = head->next; - consume_skb(head); - head = prev; + goto err; } /* We have to remove fragment header from datagram and to relocate * header in order to calculate ICV correctly. */ - skb_network_header(head)[fq->nhoffset] = skb_transport_header(head)[0]; - memmove(head->head + sizeof(struct frag_hdr), head->head, - (head->data - head->head) - sizeof(struct frag_hdr)); - head->mac_header += sizeof(struct frag_hdr); - head->network_header += sizeof(struct frag_hdr); - - skb_shinfo(head)->frag_list = head->next; - skb_reset_transport_header(head); - skb_push(head, head->data - skb_network_header(head)); - - for (fp = head->next; fp; fp = fp->next) { - head->data_len += fp->len; - head->len += fp->len; - if (head->ip_summed != fp->ip_summed) - head->ip_summed = CHECKSUM_NONE; - else if (head->ip_summed == CHECKSUM_COMPLETE) - head->csum = csum_add(head->csum, fp->csum); - head->truesize += fp->truesize; - fp->sk = NULL; - } - sub_frag_mem_limit(fq->q.net, head->truesize); + skb_network_header(skb)[fq->nhoffset] = skb_transport_header(skb)[0]; + memmove(skb->head + sizeof(struct frag_hdr), skb->head, + (skb->data - skb->head) - sizeof(struct frag_hdr)); + skb->mac_header += sizeof(struct frag_hdr); + skb->network_header += sizeof(struct frag_hdr); + + skb_reset_transport_header(skb); - head->ignore_df = 1; - head->next = NULL; - head->dev = dev; - head->tstamp = fq->q.stamp; - ipv6_hdr(head)->payload_len = htons(payload_len); - ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn); - IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size; + inet_frag_reasm_finish(&fq->q, skb, reasm_data); + + skb->ignore_df = 1; + skb->dev = dev; + ipv6_hdr(skb)->payload_len = htons(payload_len); + ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn); + IP6CB(skb)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size; /* Yes, and fold redundant checksum back. 8) */ - if (head->ip_summed == CHECKSUM_COMPLETE) - head->csum = csum_partial(skb_network_header(head), - skb_network_header_len(head), - head->csum); + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_partial(skb_network_header(skb), + skb_network_header_len(skb), + skb->csum); fq->q.fragments = NULL; fq->q.rb_fragments = RB_ROOT; fq->q.fragments_tail = NULL; + fq->q.last_run_head = NULL; + + return 0; - return true; +err: + inet_frag_kill(&fq->q); + return -EINVAL; } /* @@ -543,7 +450,6 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) { u16 savethdr = skb->transport_header; - struct net_device *dev = skb->dev; int fhoff, nhoff, ret; struct frag_hdr *fhdr; struct frag_queue *fq; @@ -566,10 +472,6 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) hdr = ipv6_hdr(skb); fhdr = (struct frag_hdr *)skb_transport_header(skb); - if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU && - fhdr->frag_off & htons(IP6_MF)) - return -EINVAL; - skb_orphan(skb); fq = fq_find(net, fhdr->identification, user, hdr, skb->dev ? skb->dev->ifindex : 0); @@ -581,24 +483,11 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) spin_lock_bh(&fq->q.lock); ret = nf_ct_frag6_queue(fq, skb, fhdr, nhoff); - if (ret < 0) { - if (ret == -EPROTO) { - skb->transport_header = savethdr; - ret = 0; - } - goto out_unlock; - } - - /* after queue has assumed skb ownership, only 0 or -EINPROGRESS - * must be returned. - */ - ret = -EINPROGRESS; - if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && - fq->q.meat == fq->q.len && - nf_ct_frag6_reasm(fq, skb, dev)) + if (ret == -EPROTO) { + skb->transport_header = savethdr; ret = 0; + } -out_unlock: spin_unlock_bh(&fq->q.lock); inet_frag_put(&fq->q); return ret; @@ -634,16 +523,24 @@ static struct pernet_operations nf_ct_net_ops = { .exit = nf_ct_net_exit, }; +static const struct rhashtable_params nfct_rhash_params = { + .head_offset = offsetof(struct inet_frag_queue, node), + .hashfn = ip6frag_key_hashfn, + .obj_hashfn = ip6frag_obj_hashfn, + .obj_cmpfn = ip6frag_obj_cmpfn, + .automatic_shrinking = true, +}; + int nf_ct_frag6_init(void) { int ret = 0; - nf_frags.constructor = ip6_frag_init; + nf_frags.constructor = ip6frag_init; nf_frags.destructor = NULL; nf_frags.qsize = sizeof(struct frag_queue); nf_frags.frag_expire = nf_ct_frag6_expire; nf_frags.frags_cache_name = nf_frags_cache_name; - nf_frags.rhash_params = ip6_rhash_params; + nf_frags.rhash_params = nfct_rhash_params; ret = inet_frags_init(&nf_frags); if (ret) goto out; diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index f06b0471f39f..c4070e9c4260 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -14,8 +14,7 @@ #include <linux/skbuff.h> #include <linux/icmp.h> #include <linux/sysctl.h> -#include <net/ipv6.h> -#include <net/inet_frag.h> +#include <net/ipv6_frag.h> #include <linux/netfilter_ipv6.h> #include <linux/netfilter_bridge.h> diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index a338bbc33cf3..6a6d01cb1ace 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -10,15 +10,25 @@ #include <net/secure_seq.h> #include <linux/netfilter.h> -static u32 __ipv6_select_ident(struct net *net, u32 hashrnd, +static u32 __ipv6_select_ident(struct net *net, const struct in6_addr *dst, const struct in6_addr *src) { + const struct { + struct in6_addr dst; + struct in6_addr src; + } __aligned(SIPHASH_ALIGNMENT) combined = { + .dst = *dst, + .src = *src, + }; u32 hash, id; - hash = __ipv6_addr_jhash(dst, hashrnd); - hash = __ipv6_addr_jhash(src, hash); - hash ^= net_hash_mix(net); + /* Note the following code is not safe, but this is okay. */ + if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key))) + get_random_bytes(&net->ipv4.ip_id_key, + sizeof(net->ipv4.ip_id_key)); + + hash = siphash(&combined, sizeof(combined), &net->ipv4.ip_id_key); /* Treat id of 0 as unset and if we get 0 back from ip_idents_reserve, * set the hight order instead thus minimizing possible future @@ -41,7 +51,6 @@ static u32 __ipv6_select_ident(struct net *net, u32 hashrnd, */ void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb) { - static u32 ip6_proxy_idents_hashrnd __read_mostly; struct in6_addr buf[2]; struct in6_addr *addrs; u32 id; @@ -53,11 +62,7 @@ void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb) if (!addrs) return; - net_get_random_once(&ip6_proxy_idents_hashrnd, - sizeof(ip6_proxy_idents_hashrnd)); - - id = __ipv6_select_ident(net, ip6_proxy_idents_hashrnd, - &addrs[1], &addrs[0]); + id = __ipv6_select_ident(net, &addrs[1], &addrs[0]); skb_shinfo(skb)->ip6_frag_id = htonl(id); } EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); @@ -66,12 +71,9 @@ __be32 ipv6_select_ident(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr) { - static u32 ip6_idents_hashrnd __read_mostly; u32 id; - net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd)); - - id = __ipv6_select_ident(net, ip6_idents_hashrnd, daddr, saddr); + id = __ipv6_select_ident(net, daddr, saddr); return htonl(id); } EXPORT_SYMBOL(ipv6_select_ident); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 982868193dbb..e209ae19fe78 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -239,7 +239,7 @@ static int __net_init ping_v6_proc_init_net(struct net *net) return ping_proc_register(net, &ping_v6_seq_afinfo); } -static void __net_init ping_v6_proc_exit_net(struct net *net) +static void __net_exit ping_v6_proc_exit_net(struct net *net) { return ping_proc_unregister(net, &ping_v6_seq_afinfo); } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index a4f979ff31b9..301978df650e 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -283,7 +283,9 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* Binding to link-local address requires an interface */ if (!sk->sk_bound_dev_if) goto out_unlock; + } + if (sk->sk_bound_dev_if) { err = -ENODEV; dev = dev_get_by_index_rcu(sock_net(sk), sk->sk_bound_dev_if); @@ -772,6 +774,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) struct sockcm_cookie sockc; struct ipcm6_cookie ipc6; int addr_len = msg->msg_namelen; + int hdrincl; u16 proto; int err; @@ -785,6 +788,13 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; + /* hdrincl should be READ_ONCE(inet->hdrincl) + * but READ_ONCE() doesn't work with bit fields. + * Doing this indirectly yields the same result. + */ + hdrincl = inet->hdrincl; + hdrincl = READ_ONCE(hdrincl); + /* * Get and verify the address. */ @@ -878,11 +888,14 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) opt = ipv6_fixup_options(&opt_space, opt); fl6.flowi6_proto = proto; - rfv.msg = msg; - rfv.hlen = 0; - err = rawv6_probe_proto_opt(&rfv, &fl6); - if (err) - goto out; + + if (!hdrincl) { + rfv.msg = msg; + rfv.hlen = 0; + err = rawv6_probe_proto_opt(&rfv, &fl6); + if (err) + goto out; + } if (!ipv6_addr_any(daddr)) fl6.daddr = *daddr; @@ -899,7 +912,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl6.flowi6_oif = np->ucast_oif; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - if (inet->hdrincl) + if (hdrincl) fl6.flowi6_flags |= FLOWI_FLAG_KNOWN_NH; if (ipc6.tclass < 0) @@ -922,7 +935,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) goto do_confirm; back_from_confirm: - if (inet->hdrincl) + if (hdrincl) err = rawv6_send_hdrinc(sk, msg, len, &fl6, &dst, msg->msg_flags); else { ipc6.opt = opt; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 74ffbcb306a6..3f488555999e 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -57,18 +57,11 @@ #include <net/rawv6.h> #include <net/ndisc.h> #include <net/addrconf.h> -#include <net/inet_frag.h> +#include <net/ipv6_frag.h> #include <net/inet_ecn.h> static const char ip6_frag_cache_name[] = "ip6-frags"; -struct ip6frag_skb_cb { - struct inet6_skb_parm h; - int offset; -}; - -#define FRAG6_CB(skb) ((struct ip6frag_skb_cb *)((skb)->cb)) - static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) { return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK); @@ -76,63 +69,8 @@ static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) static struct inet_frags ip6_frags; -static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, - struct net_device *dev); - -void ip6_frag_init(struct inet_frag_queue *q, const void *a) -{ - struct frag_queue *fq = container_of(q, struct frag_queue, q); - const struct frag_v6_compare_key *key = a; - - q->key.v6 = *key; - fq->ecn = 0; -} -EXPORT_SYMBOL(ip6_frag_init); - -void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq) -{ - struct net_device *dev = NULL; - struct sk_buff *head; - - rcu_read_lock(); - spin_lock(&fq->q.lock); - - if (fq->q.flags & INET_FRAG_COMPLETE) - goto out; - - inet_frag_kill(&fq->q); - - dev = dev_get_by_index_rcu(net, fq->iif); - if (!dev) - goto out; - - __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); - __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); - - /* Don't send error if the first segment did not arrive. */ - head = fq->q.fragments; - if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head) - goto out; - - /* But use as source device on which LAST ARRIVED - * segment was received. And do not use fq->dev - * pointer directly, device might already disappeared. - */ - head->dev = dev; - skb_get(head); - spin_unlock(&fq->q.lock); - - icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0); - kfree_skb(head); - goto out_rcu_unlock; - -out: - spin_unlock(&fq->q.lock); -out_rcu_unlock: - rcu_read_unlock(); - inet_frag_put(&fq->q); -} -EXPORT_SYMBOL(ip6_expire_frag_queue); +static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, + struct sk_buff *prev_tail, struct net_device *dev); static void ip6_frag_expire(unsigned long data) { @@ -142,7 +80,7 @@ static void ip6_frag_expire(unsigned long data) fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); net = container_of(fq->q.net, struct net, ipv6.frags); - ip6_expire_frag_queue(net, fq); + ip6frag_expire_frag_queue(net, fq); } static struct frag_queue * @@ -169,27 +107,29 @@ fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif) } static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, - struct frag_hdr *fhdr, int nhoff) + struct frag_hdr *fhdr, int nhoff, + u32 *prob_offset) { - struct sk_buff *prev, *next; - struct net_device *dev; - int offset, end; struct net *net = dev_net(skb_dst(skb)->dev); + int offset, end, fragsize; + struct sk_buff *prev_tail; + struct net_device *dev; + int err = -ENOENT; u8 ecn; if (fq->q.flags & INET_FRAG_COMPLETE) goto err; + err = -EINVAL; offset = ntohs(fhdr->frag_off) & ~0x7; end = offset + (ntohs(ipv6_hdr(skb)->payload_len) - ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1))); if ((unsigned int)end > IPV6_MAXPLEN) { - __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INHDRERRORS); - icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, - ((u8 *)&fhdr->frag_off - - skb_network_header(skb))); + *prob_offset = (u8 *)&fhdr->frag_off - skb_network_header(skb); + /* note that if prob_offset is set, the skb is freed elsewhere, + * we do not free it here. + */ return -1; } @@ -209,7 +149,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, */ if (end < fq->q.len || ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len)) - goto err; + goto discard_fq; fq->q.flags |= INET_FRAG_LAST_IN; fq->q.len = end; } else { @@ -220,84 +160,51 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, /* RFC2460 says always send parameter problem in * this case. -DaveM */ - __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INHDRERRORS); - icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, - offsetof(struct ipv6hdr, payload_len)); + *prob_offset = offsetof(struct ipv6hdr, payload_len); return -1; } if (end > fq->q.len) { /* Some bits beyond end -> corruption. */ if (fq->q.flags & INET_FRAG_LAST_IN) - goto err; + goto discard_fq; fq->q.len = end; } } if (end == offset) - goto err; + goto discard_fq; + err = -ENOMEM; /* Point into the IP datagram 'data' part. */ if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) - goto err; - - if (pskb_trim_rcsum(skb, end - offset)) - goto err; - - /* Find out which fragments are in front and at the back of us - * in the chain of fragments so far. We must know where to put - * this fragment, right? - */ - prev = fq->q.fragments_tail; - if (!prev || FRAG6_CB(prev)->offset < offset) { - next = NULL; - goto found; - } - prev = NULL; - for (next = fq->q.fragments; next != NULL; next = next->next) { - if (FRAG6_CB(next)->offset >= offset) - break; /* bingo! */ - prev = next; - } - -found: - /* RFC5722, Section 4, amended by Errata ID : 3089 - * When reassembling an IPv6 datagram, if - * one or more its constituent fragments is determined to be an - * overlapping fragment, the entire datagram (and any constituent - * fragments) MUST be silently discarded. - */ - - /* Check for overlap with preceding fragment. */ - if (prev && - (FRAG6_CB(prev)->offset + prev->len) > offset) goto discard_fq; - /* Look for overlap with succeeding segment. */ - if (next && FRAG6_CB(next)->offset < end) + err = pskb_trim_rcsum(skb, end - offset); + if (err) goto discard_fq; - FRAG6_CB(skb)->offset = offset; + /* Note : skb->rbnode and skb->dev share the same location. */ + dev = skb->dev; + /* Makes sure compiler wont do silly aliasing games */ + barrier(); - /* Insert this fragment in the chain of fragments. */ - skb->next = next; - if (!next) - fq->q.fragments_tail = skb; - if (prev) - prev->next = skb; - else - fq->q.fragments = skb; + prev_tail = fq->q.fragments_tail; + err = inet_frag_queue_insert(&fq->q, skb, offset, end); + if (err) + goto insert_error; - dev = skb->dev; - if (dev) { + if (dev) fq->iif = dev->ifindex; - skb->dev = NULL; - } + fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; fq->ecn |= ecn; add_frag_mem_limit(fq->q.net, skb->truesize); + fragsize = -skb_network_offset(skb) + skb->len; + if (fragsize > fq->q.max_size) + fq->q.max_size = fragsize; + /* The first fragment. * nhoffset is obtained from the first fragment, of course. */ @@ -308,44 +215,48 @@ found: if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && fq->q.meat == fq->q.len) { - int res; unsigned long orefdst = skb->_skb_refdst; skb->_skb_refdst = 0UL; - res = ip6_frag_reasm(fq, prev, dev); + err = ip6_frag_reasm(fq, skb, prev_tail, dev); skb->_skb_refdst = orefdst; - return res; + return err; } skb_dst_drop(skb); - return -1; + return -EINPROGRESS; +insert_error: + if (err == IPFRAG_DUP) { + kfree_skb(skb); + return -EINVAL; + } + err = -EINVAL; + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_REASM_OVERLAPS); discard_fq: inet_frag_kill(&fq->q); -err: __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); +err: kfree_skb(skb); - return -1; + return err; } /* * Check if this packet is complete. - * Returns NULL on failure by any reason, and pointer - * to current nexthdr field in reassembled frame. * * It is called with locked fq, and caller must check that * queue is eligible for reassembly i.e. it is not COMPLETE, * the last and the first frames arrived and all the bits are here. */ -static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, - struct net_device *dev) +static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, + struct sk_buff *prev_tail, struct net_device *dev) { struct net *net = container_of(fq->q.net, struct net, ipv6.frags); - struct sk_buff *fp, *head = fq->q.fragments; - int payload_len; unsigned int nhoff; - int sum_truesize; + void *reasm_data; + int payload_len; u8 ecn; inet_frag_kill(&fq->q); @@ -354,113 +265,40 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, if (unlikely(ecn == 0xff)) goto out_fail; - /* Make the one we just received the head. */ - if (prev) { - head = prev->next; - fp = skb_clone(head, GFP_ATOMIC); - - if (!fp) - goto out_oom; - - fp->next = head->next; - if (!fp->next) - fq->q.fragments_tail = fp; - prev->next = fp; - - skb_morph(head, fq->q.fragments); - head->next = fq->q.fragments->next; - - consume_skb(fq->q.fragments); - fq->q.fragments = head; - } - - WARN_ON(head == NULL); - WARN_ON(FRAG6_CB(head)->offset != 0); + reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail); + if (!reasm_data) + goto out_oom; - /* Unfragmented part is taken from the first segment. */ - payload_len = ((head->data - skb_network_header(head)) - + payload_len = ((skb->data - skb_network_header(skb)) - sizeof(struct ipv6hdr) + fq->q.len - sizeof(struct frag_hdr)); if (payload_len > IPV6_MAXPLEN) goto out_oversize; - /* Head of list must not be cloned. */ - if (skb_unclone(head, GFP_ATOMIC)) - goto out_oom; - - /* If the first fragment is fragmented itself, we split - * it to two chunks: the first with data and paged part - * and the second, holding only fragments. */ - if (skb_has_frag_list(head)) { - struct sk_buff *clone; - int i, plen = 0; - - clone = alloc_skb(0, GFP_ATOMIC); - if (!clone) - goto out_oom; - clone->next = head->next; - head->next = clone; - skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; - skb_frag_list_init(head); - for (i = 0; i < skb_shinfo(head)->nr_frags; i++) - plen += skb_frag_size(&skb_shinfo(head)->frags[i]); - clone->len = clone->data_len = head->data_len - plen; - head->data_len -= clone->len; - head->len -= clone->len; - clone->csum = 0; - clone->ip_summed = head->ip_summed; - add_frag_mem_limit(fq->q.net, clone->truesize); - } - /* We have to remove fragment header from datagram and to relocate * header in order to calculate ICV correctly. */ nhoff = fq->nhoffset; - skb_network_header(head)[nhoff] = skb_transport_header(head)[0]; - memmove(head->head + sizeof(struct frag_hdr), head->head, - (head->data - head->head) - sizeof(struct frag_hdr)); - if (skb_mac_header_was_set(head)) - head->mac_header += sizeof(struct frag_hdr); - head->network_header += sizeof(struct frag_hdr); - - skb_reset_transport_header(head); - skb_push(head, head->data - skb_network_header(head)); - - sum_truesize = head->truesize; - for (fp = head->next; fp;) { - bool headstolen; - int delta; - struct sk_buff *next = fp->next; - - sum_truesize += fp->truesize; - if (head->ip_summed != fp->ip_summed) - head->ip_summed = CHECKSUM_NONE; - else if (head->ip_summed == CHECKSUM_COMPLETE) - head->csum = csum_add(head->csum, fp->csum); - - if (skb_try_coalesce(head, fp, &headstolen, &delta)) { - kfree_skb_partial(fp, headstolen); - } else { - if (!skb_shinfo(head)->frag_list) - skb_shinfo(head)->frag_list = fp; - head->data_len += fp->len; - head->len += fp->len; - head->truesize += fp->truesize; - } - fp = next; - } - sub_frag_mem_limit(fq->q.net, sum_truesize); + skb_network_header(skb)[nhoff] = skb_transport_header(skb)[0]; + memmove(skb->head + sizeof(struct frag_hdr), skb->head, + (skb->data - skb->head) - sizeof(struct frag_hdr)); + if (skb_mac_header_was_set(skb)) + skb->mac_header += sizeof(struct frag_hdr); + skb->network_header += sizeof(struct frag_hdr); + + skb_reset_transport_header(skb); - head->next = NULL; - head->dev = dev; - head->tstamp = fq->q.stamp; - ipv6_hdr(head)->payload_len = htons(payload_len); - ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn); - IP6CB(head)->nhoff = nhoff; - IP6CB(head)->flags |= IP6SKB_FRAGMENTED; + inet_frag_reasm_finish(&fq->q, skb, reasm_data); + + skb->dev = dev; + ipv6_hdr(skb)->payload_len = htons(payload_len); + ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn); + IP6CB(skb)->nhoff = nhoff; + IP6CB(skb)->flags |= IP6SKB_FRAGMENTED; + IP6CB(skb)->frag_max_size = fq->q.max_size; /* Yes, and fold redundant checksum back. 8) */ - skb_postpush_rcsum(head, skb_network_header(head), - skb_network_header_len(head)); + skb_postpush_rcsum(skb, skb_network_header(skb), + skb_network_header_len(skb)); rcu_read_lock(); __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); @@ -468,6 +306,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, fq->q.fragments = NULL; fq->q.rb_fragments = RB_ROOT; fq->q.fragments_tail = NULL; + fq->q.last_run_head = NULL; return 1; out_oversize: @@ -479,6 +318,7 @@ out_fail: rcu_read_lock(); __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); rcu_read_unlock(); + inet_frag_kill(&fq->q); return -1; } @@ -517,22 +357,26 @@ static int ipv6_frag_rcv(struct sk_buff *skb) return 1; } - if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU && - fhdr->frag_off & htons(IP6_MF)) - goto fail_hdr; - iif = skb->dev ? skb->dev->ifindex : 0; fq = fq_find(net, fhdr->identification, hdr, iif); if (fq) { + u32 prob_offset = 0; int ret; spin_lock(&fq->q.lock); fq->iif = iif; - ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff); + ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff, + &prob_offset); spin_unlock(&fq->q.lock); inet_frag_put(&fq->q); + if (prob_offset) { + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INHDRERRORS); + /* icmpv6_param_prob() calls kfree_skb(skb) */ + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, prob_offset); + } return ret; } @@ -700,42 +544,19 @@ static struct pernet_operations ip6_frags_ops = { .exit = ipv6_frags_exit_net, }; -static u32 ip6_key_hashfn(const void *data, u32 len, u32 seed) -{ - return jhash2(data, - sizeof(struct frag_v6_compare_key) / sizeof(u32), seed); -} - -static u32 ip6_obj_hashfn(const void *data, u32 len, u32 seed) -{ - const struct inet_frag_queue *fq = data; - - return jhash2((const u32 *)&fq->key.v6, - sizeof(struct frag_v6_compare_key) / sizeof(u32), seed); -} - -static int ip6_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr) -{ - const struct frag_v6_compare_key *key = arg->key; - const struct inet_frag_queue *fq = ptr; - - return !!memcmp(&fq->key, key, sizeof(*key)); -} - -const struct rhashtable_params ip6_rhash_params = { +static const struct rhashtable_params ip6_rhash_params = { .head_offset = offsetof(struct inet_frag_queue, node), - .hashfn = ip6_key_hashfn, - .obj_hashfn = ip6_obj_hashfn, - .obj_cmpfn = ip6_obj_cmpfn, + .hashfn = ip6frag_key_hashfn, + .obj_hashfn = ip6frag_obj_hashfn, + .obj_cmpfn = ip6frag_obj_cmpfn, .automatic_shrinking = true, }; -EXPORT_SYMBOL(ip6_rhash_params); int __init ipv6_frag_init(void) { int ret; - ip6_frags.constructor = ip6_frag_init; + ip6_frags.constructor = ip6frag_init; ip6_frags.destructor = NULL; ip6_frags.qsize = sizeof(struct frag_queue); ip6_frags.frag_expire = ip6_frag_expire; @@ -771,8 +592,8 @@ err_protocol: void ipv6_frag_exit(void) { - inet_frags_fini(&ip6_frags); ip6_frags_sysctl_unregister(); unregister_pernet_subsys(&ip6_frags_ops); inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT); + inet_frags_fini(&ip6_frags); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 27c93baed708..2c4743f2d50e 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3069,6 +3069,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg) */ cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL | NLM_F_REPLACE); + cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE; nhn++; } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index c9c6a5e829ab..16eba7b5f1a9 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -661,6 +661,10 @@ static int ipip6_rcv(struct sk_buff *skb) !net_eq(tunnel->net, dev_net(tunnel->dev)))) goto out; + /* skb can be uncloned in iptunnel_pull_header, so + * old iph is no longer valid + */ + iph = (const struct iphdr *)skb_mac_header(skb); err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { if (log_ecn_error) @@ -1065,7 +1069,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) if (!tdev && tunnel->parms.link) tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); - if (tdev) { + if (tdev && !netif_is_l3_master(tdev)) { int t_hlen = tunnel->hlen + sizeof(struct iphdr); dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); @@ -1852,7 +1856,6 @@ static int __net_init sit_init_net(struct net *net) err_reg_dev: ipip6_dev_free(sitn->fb_tunnel_dev); - free_netdev(sitn->fb_tunnel_dev); err_alloc_dev: return err; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0a69d39880f2..4953466cf98f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1056,11 +1056,11 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * newnp->ipv6_fl_list = NULL; newnp->pktoptions = NULL; newnp->opt = NULL; - newnp->mcast_oif = tcp_v6_iif(skb); - newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; - newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); + newnp->mcast_oif = inet_iif(skb); + newnp->mcast_hops = ip_hdr(skb)->ttl; + newnp->rcv_flowinfo = 0; if (np->repflow) - newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); + newnp->flow_label = 0; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks count diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 4db5f541bca6..6a397e110b46 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -294,7 +294,7 @@ struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport, &iph->daddr, dport, inet6_iif(skb), - &udp_table, skb); + &udp_table, NULL); } EXPORT_SYMBOL_GPL(udp6_lib_lookup_skb); @@ -479,7 +479,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct net *net = dev_net(skb->dev); sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, - inet6_iif(skb), udptable, skb); + inet6_iif(skb), udptable, NULL); if (!sk) { __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 3a2701d42f47..07b7b2540579 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -391,6 +391,10 @@ static void __exit xfrm6_tunnel_fini(void) xfrm6_tunnel_deregister(&xfrm6_tunnel_handler, AF_INET6); xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6); unregister_pernet_subsys(&xfrm6_tunnel_net_ops); + /* Someone maybe has gotten the xfrm6_tunnel_spi. + * So need to wait it. + */ + rcu_barrier(); kmem_cache_destroy(xfrm6_tunnel_spi_kmem); } |