diff options
Diffstat (limited to 'net')
183 files changed, 1817 insertions, 742 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index cd7c0429cddf..796d95797ab4 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -177,9 +177,6 @@ int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack) if (err) goto out_unregister_netdev; - /* Account for reference in struct vlan_dev_priv */ - dev_hold(real_dev); - vlan_stacked_transfer_operstate(real_dev, dev, vlan); linkwatch_fire_event(dev); /* _MUST_ call rfc2863_policy() */ diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 415a29d42cdf..589615ec490b 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -583,6 +583,9 @@ static int vlan_dev_init(struct net_device *dev) if (!vlan->vlan_pcpu_stats) return -ENOMEM; + /* Get vlan's reference to real_dev */ + dev_hold(real_dev); + return 0; } diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index 44e6c74ed428..2779ec1053a0 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -301,9 +301,9 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv) ref = priv->rings[i].intf->ref[j]; gnttab_end_foreign_access(ref, 0, 0); } - free_pages((unsigned long)priv->rings[i].data.in, - XEN_9PFS_RING_ORDER - - (PAGE_SHIFT - XEN_PAGE_SHIFT)); + free_pages_exact(priv->rings[i].data.in, + 1UL << (XEN_9PFS_RING_ORDER + + XEN_PAGE_SHIFT)); } gnttab_end_foreign_access(priv->rings[i].ref, 0, 0); free_page((unsigned long)priv->rings[i].intf); @@ -341,8 +341,8 @@ static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev, if (ret < 0) goto out; ring->ref = ret; - bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - XEN_9PFS_RING_ORDER - (PAGE_SHIFT - XEN_PAGE_SHIFT)); + bytes = alloc_pages_exact(1UL << (XEN_9PFS_RING_ORDER + XEN_PAGE_SHIFT), + GFP_KERNEL | __GFP_ZERO); if (!bytes) { ret = -ENOMEM; goto out; @@ -373,9 +373,7 @@ out: if (bytes) { for (i--; i >= 0; i--) gnttab_end_foreign_access(ring->intf->ref[i], 0, 0); - free_pages((unsigned long)bytes, - XEN_9PFS_RING_ORDER - - (PAGE_SHIFT - XEN_PAGE_SHIFT)); + free_pages_exact(bytes, 1UL << (XEN_9PFS_RING_ORDER + XEN_PAGE_SHIFT)); } gnttab_end_foreign_access(ring->ref, 0, 0); free_page((unsigned long)ring->intf); diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 2fdb1b573e8c..aff991ca0e4a 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -77,6 +77,7 @@ static void ax25_kill_by_device(struct net_device *dev) { ax25_dev *ax25_dev; ax25_cb *s; + struct sock *sk; if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) return; @@ -85,11 +86,26 @@ static void ax25_kill_by_device(struct net_device *dev) again: ax25_for_each(s, &ax25_list) { if (s->ax25_dev == ax25_dev) { - s->ax25_dev = NULL; + sk = s->sk; + if (!sk) { + spin_unlock_bh(&ax25_list_lock); + ax25_disconnect(s, ENETUNREACH); + s->ax25_dev = NULL; + spin_lock_bh(&ax25_list_lock); + goto again; + } + sock_hold(sk); spin_unlock_bh(&ax25_list_lock); + lock_sock(sk); ax25_disconnect(s, ENETUNREACH); + s->ax25_dev = NULL; + if (sk->sk_socket) { + dev_put(ax25_dev->dev); + ax25_dev_put(ax25_dev); + } + release_sock(sk); spin_lock_bh(&ax25_list_lock); - + sock_put(sk); /* The entry could have been deleted from the * list meanwhile and thus the next pointer is * no longer valid. Play it safe and restart @@ -353,21 +369,25 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg) if (copy_from_user(&ax25_ctl, arg, sizeof(ax25_ctl))) return -EFAULT; - if ((ax25_dev = ax25_addr_ax25dev(&ax25_ctl.port_addr)) == NULL) - return -ENODEV; - if (ax25_ctl.digi_count > AX25_MAX_DIGIS) return -EINVAL; if (ax25_ctl.arg > ULONG_MAX / HZ && ax25_ctl.cmd != AX25_KILL) return -EINVAL; + ax25_dev = ax25_addr_ax25dev(&ax25_ctl.port_addr); + if (!ax25_dev) + return -ENODEV; + digi.ndigi = ax25_ctl.digi_count; for (k = 0; k < digi.ndigi; k++) digi.calls[k] = ax25_ctl.digi_addr[k]; - if ((ax25 = ax25_find_cb(&ax25_ctl.source_addr, &ax25_ctl.dest_addr, &digi, ax25_dev->dev)) == NULL) + ax25 = ax25_find_cb(&ax25_ctl.source_addr, &ax25_ctl.dest_addr, &digi, ax25_dev->dev); + if (!ax25) { + ax25_dev_put(ax25_dev); return -ENOTCONN; + } switch (ax25_ctl.cmd) { case AX25_KILL: @@ -434,6 +454,7 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg) } out_put: + ax25_dev_put(ax25_dev); ax25_cb_put(ax25); return ret; @@ -959,14 +980,16 @@ static int ax25_release(struct socket *sock) { struct sock *sk = sock->sk; ax25_cb *ax25; + ax25_dev *ax25_dev; if (sk == NULL) return 0; sock_hold(sk); - sock_orphan(sk); lock_sock(sk); + sock_orphan(sk); ax25 = sk_to_ax25(sk); + ax25_dev = ax25->ax25_dev; if (sk->sk_type == SOCK_SEQPACKET) { switch (ax25->state) { @@ -1028,6 +1051,15 @@ static int ax25_release(struct socket *sock) sk->sk_state_change(sk); ax25_destroy_socket(ax25); } + if (ax25_dev) { + del_timer_sync(&ax25->timer); + del_timer_sync(&ax25->t1timer); + del_timer_sync(&ax25->t2timer); + del_timer_sync(&ax25->t3timer); + del_timer_sync(&ax25->idletimer); + dev_put(ax25_dev->dev); + ax25_dev_put(ax25_dev); + } sock->sk = NULL; release_sock(sk); @@ -1104,8 +1136,10 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) } } - if (ax25_dev != NULL) + if (ax25_dev) { ax25_fillin_cb(ax25, ax25_dev); + dev_hold(ax25_dev->dev); + } done: ax25_cb_add(ax25); diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c index 4ac2e0847652..d2e0cc67d91a 100644 --- a/net/ax25/ax25_dev.c +++ b/net/ax25/ax25_dev.c @@ -37,6 +37,7 @@ ax25_dev *ax25_addr_ax25dev(ax25_address *addr) for (ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next) if (ax25cmp(addr, (ax25_address *)ax25_dev->dev->dev_addr) == 0) { res = ax25_dev; + ax25_dev_hold(ax25_dev); } spin_unlock_bh(&ax25_dev_lock); @@ -56,6 +57,7 @@ void ax25_dev_device_up(struct net_device *dev) return; } + refcount_set(&ax25_dev->refcount, 1); dev->ax25_ptr = ax25_dev; ax25_dev->dev = dev; dev_hold(dev); @@ -84,6 +86,7 @@ void ax25_dev_device_up(struct net_device *dev) ax25_dev->next = ax25_dev_list; ax25_dev_list = ax25_dev; spin_unlock_bh(&ax25_dev_lock); + ax25_dev_hold(ax25_dev); ax25_register_dev_sysctl(ax25_dev); } @@ -113,9 +116,10 @@ void ax25_dev_device_down(struct net_device *dev) if ((s = ax25_dev_list) == ax25_dev) { ax25_dev_list = s->next; spin_unlock_bh(&ax25_dev_lock); + ax25_dev_put(ax25_dev); dev->ax25_ptr = NULL; dev_put(dev); - kfree(ax25_dev); + ax25_dev_put(ax25_dev); return; } @@ -123,9 +127,10 @@ void ax25_dev_device_down(struct net_device *dev) if (s->next == ax25_dev) { s->next = ax25_dev->next; spin_unlock_bh(&ax25_dev_lock); + ax25_dev_put(ax25_dev); dev->ax25_ptr = NULL; dev_put(dev); - kfree(ax25_dev); + ax25_dev_put(ax25_dev); return; } @@ -133,6 +138,7 @@ void ax25_dev_device_down(struct net_device *dev) } spin_unlock_bh(&ax25_dev_lock); dev->ax25_ptr = NULL; + ax25_dev_put(ax25_dev); } int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd) @@ -144,20 +150,32 @@ int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd) switch (cmd) { case SIOCAX25ADDFWD: - if ((fwd_dev = ax25_addr_ax25dev(&fwd->port_to)) == NULL) + fwd_dev = ax25_addr_ax25dev(&fwd->port_to); + if (!fwd_dev) { + ax25_dev_put(ax25_dev); return -EINVAL; - if (ax25_dev->forward != NULL) + } + if (ax25_dev->forward) { + ax25_dev_put(fwd_dev); + ax25_dev_put(ax25_dev); return -EINVAL; + } ax25_dev->forward = fwd_dev->dev; + ax25_dev_put(fwd_dev); + ax25_dev_put(ax25_dev); break; case SIOCAX25DELFWD: - if (ax25_dev->forward == NULL) + if (!ax25_dev->forward) { + ax25_dev_put(ax25_dev); return -EINVAL; + } ax25_dev->forward = NULL; + ax25_dev_put(ax25_dev); break; default: + ax25_dev_put(ax25_dev); return -EINVAL; } diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index b40e0bce67ea..dc2168d2a32a 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -75,11 +75,13 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) ax25_dev *ax25_dev; int i; - if ((ax25_dev = ax25_addr_ax25dev(&route->port_addr)) == NULL) - return -EINVAL; if (route->digi_count > AX25_MAX_DIGIS) return -EINVAL; + ax25_dev = ax25_addr_ax25dev(&route->port_addr); + if (!ax25_dev) + return -EINVAL; + write_lock_bh(&ax25_route_lock); ax25_rt = ax25_route_list; @@ -91,6 +93,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) if (route->digi_count != 0) { if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) { write_unlock_bh(&ax25_route_lock); + ax25_dev_put(ax25_dev); return -ENOMEM; } ax25_rt->digipeat->lastrepeat = -1; @@ -101,6 +104,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) } } write_unlock_bh(&ax25_route_lock); + ax25_dev_put(ax25_dev); return 0; } ax25_rt = ax25_rt->next; @@ -108,6 +112,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) if ((ax25_rt = kmalloc(sizeof(ax25_route), GFP_ATOMIC)) == NULL) { write_unlock_bh(&ax25_route_lock); + ax25_dev_put(ax25_dev); return -ENOMEM; } @@ -120,6 +125,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) { write_unlock_bh(&ax25_route_lock); kfree(ax25_rt); + ax25_dev_put(ax25_dev); return -ENOMEM; } ax25_rt->digipeat->lastrepeat = -1; @@ -132,6 +138,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) ax25_rt->next = ax25_route_list; ax25_route_list = ax25_rt; write_unlock_bh(&ax25_route_lock); + ax25_dev_put(ax25_dev); return 0; } @@ -173,6 +180,7 @@ static int ax25_rt_del(struct ax25_routes_struct *route) } } write_unlock_bh(&ax25_route_lock); + ax25_dev_put(ax25_dev); return 0; } @@ -215,6 +223,7 @@ static int ax25_rt_opt(struct ax25_route_opt_struct *rt_option) out: write_unlock_bh(&ax25_route_lock); + ax25_dev_put(ax25_dev); return err; } diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c index 15ab812c4fe4..3a476e4f6cd0 100644 --- a/net/ax25/ax25_subr.c +++ b/net/ax25/ax25_subr.c @@ -261,12 +261,20 @@ void ax25_disconnect(ax25_cb *ax25, int reason) { ax25_clear_queues(ax25); - if (!ax25->sk || !sock_flag(ax25->sk, SOCK_DESTROY)) - ax25_stop_heartbeat(ax25); - ax25_stop_t1timer(ax25); - ax25_stop_t2timer(ax25); - ax25_stop_t3timer(ax25); - ax25_stop_idletimer(ax25); + if (reason == ENETUNREACH) { + del_timer_sync(&ax25->timer); + del_timer_sync(&ax25->t1timer); + del_timer_sync(&ax25->t2timer); + del_timer_sync(&ax25->t3timer); + del_timer_sync(&ax25->idletimer); + } else { + if (!ax25->sk || !sock_flag(ax25->sk, SOCK_DESTROY)) + ax25_stop_heartbeat(ax25); + ax25_stop_t1timer(ax25); + ax25_stop_t2timer(ax25); + ax25_stop_t3timer(ax25); + ax25_stop_idletimer(ax25); + } ax25->state = AX25_STATE_0; diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 385fccdcf69d..0da90e73c79b 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -391,6 +391,7 @@ out: /** * batadv_frag_create() - create a fragment from skb + * @net_dev: outgoing device for fragment * @skb: skb to create fragment from * @frag_head: header to use in new fragment * @fragment_size: size of new fragment @@ -401,22 +402,25 @@ out: * * Return: the new fragment, NULL on error. */ -static struct sk_buff *batadv_frag_create(struct sk_buff *skb, +static struct sk_buff *batadv_frag_create(struct net_device *net_dev, + struct sk_buff *skb, struct batadv_frag_packet *frag_head, unsigned int fragment_size) { + unsigned int ll_reserved = LL_RESERVED_SPACE(net_dev); + unsigned int tailroom = net_dev->needed_tailroom; struct sk_buff *skb_fragment; unsigned int header_size = sizeof(*frag_head); unsigned int mtu = fragment_size + header_size; - skb_fragment = netdev_alloc_skb(NULL, mtu + ETH_HLEN); + skb_fragment = dev_alloc_skb(ll_reserved + mtu + tailroom); if (!skb_fragment) goto err; skb_fragment->priority = skb->priority; /* Eat the last mtu-bytes of the skb */ - skb_reserve(skb_fragment, header_size + ETH_HLEN); + skb_reserve(skb_fragment, ll_reserved + header_size); skb_split(skb, skb_fragment, skb->len - fragment_size); /* Add the header */ @@ -439,11 +443,12 @@ int batadv_frag_send_packet(struct sk_buff *skb, struct batadv_orig_node *orig_node, struct batadv_neigh_node *neigh_node) { + struct net_device *net_dev = neigh_node->if_incoming->net_dev; struct batadv_priv *bat_priv; struct batadv_hard_iface *primary_if = NULL; struct batadv_frag_packet frag_header; struct sk_buff *skb_fragment; - unsigned int mtu = neigh_node->if_incoming->net_dev->mtu; + unsigned int mtu = net_dev->mtu; unsigned int header_size = sizeof(frag_header); unsigned int max_fragment_size, num_fragments; int ret; @@ -503,7 +508,7 @@ int batadv_frag_send_packet(struct sk_buff *skb, goto put_primary_if; } - skb_fragment = batadv_frag_create(skb, &frag_header, + skb_fragment = batadv_frag_create(net_dev, skb, &frag_header, max_fragment_size); if (!skb_fragment) { ret = -ENOMEM; @@ -522,13 +527,14 @@ int batadv_frag_send_packet(struct sk_buff *skb, frag_header.no++; } - /* Make room for the fragment header. */ - if (batadv_skb_head_push(skb, header_size) < 0 || - pskb_expand_head(skb, header_size + ETH_HLEN, 0, GFP_ATOMIC) < 0) { - ret = -ENOMEM; + /* make sure that there is at least enough head for the fragmentation + * and ethernet headers + */ + ret = skb_cow_head(skb, ETH_HLEN + header_size); + if (ret < 0) goto put_primary_if; - } + skb_push(skb, header_size); memcpy(skb->data, &frag_header, header_size); /* Send the last fragment */ diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index afb52282d5bd..5f44c94ad707 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -151,22 +151,25 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev) struct net *net = dev_net(net_dev); struct net_device *parent_dev; struct net *parent_net; + int iflink; bool ret; /* check if this is a batman-adv mesh interface */ if (batadv_softif_is_valid(net_dev)) return true; - /* no more parents..stop recursion */ - if (dev_get_iflink(net_dev) == 0 || - dev_get_iflink(net_dev) == net_dev->ifindex) + iflink = dev_get_iflink(net_dev); + if (iflink == 0) return false; parent_net = batadv_getlink_net(net_dev, net); + /* iflink to itself, most likely physical device */ + if (net == parent_net && iflink == net_dev->ifindex) + return false; + /* recurse over the parent device */ - parent_dev = __dev_get_by_index((struct net *)parent_net, - dev_get_iflink(net_dev)); + parent_dev = __dev_get_by_index((struct net *)parent_net, iflink); /* if we got a NULL parent_dev there is something broken.. */ if (!parent_dev) { pr_err("Cannot find parent device\n"); @@ -216,14 +219,15 @@ static struct net_device *batadv_get_real_netdevice(struct net_device *netdev) struct net_device *real_netdev = NULL; struct net *real_net; struct net *net; - int ifindex; + int iflink; ASSERT_RTNL(); if (!netdev) return NULL; - if (netdev->ifindex == dev_get_iflink(netdev)) { + iflink = dev_get_iflink(netdev); + if (iflink == 0) { dev_hold(netdev); return netdev; } @@ -233,9 +237,16 @@ static struct net_device *batadv_get_real_netdevice(struct net_device *netdev) goto out; net = dev_net(hard_iface->soft_iface); - ifindex = dev_get_iflink(netdev); real_net = batadv_getlink_net(netdev, net); - real_netdev = dev_get_by_index(real_net, ifindex); + + /* iflink to itself, most likely physical device */ + if (net == real_net && netdev->ifindex == iflink) { + real_netdev = netdev; + dev_hold(real_netdev); + goto out; + } + + real_netdev = dev_get_by_index(real_net, iflink); out: if (hard_iface) @@ -554,6 +565,9 @@ static void batadv_hardif_recalc_extra_skbroom(struct net_device *soft_iface) needed_headroom = lower_headroom + (lower_header_len - ETH_HLEN); needed_headroom += batadv_max_header_len(); + /* fragmentation headers don't strip the unicast/... header */ + needed_headroom += sizeof(struct batadv_frag_packet); + soft_iface->needed_headroom = needed_headroom; soft_iface->needed_tailroom = lower_tailroom; } diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index f5bf931252c4..6f0a9f439233 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -136,7 +136,7 @@ static u8 batadv_mcast_mla_rtr_flags_softif_get_ipv6(struct net_device *dev) { struct inet6_dev *in6_dev = __in6_dev_get(dev); - if (in6_dev && in6_dev->cnf.mc_forwarding) + if (in6_dev && atomic_read(&in6_dev->cnf.mc_forwarding)) return BATADV_NO_FLAGS; else return BATADV_MCAST_WANT_NO_RTR6; @@ -1373,6 +1373,7 @@ batadv_mcast_forw_rtr_node_get(struct batadv_priv *bat_priv, * @bat_priv: the bat priv with all the soft interface information * @skb: The multicast packet to check * @orig: an originator to be set to forward the skb to + * @is_routable: stores whether the destination is routable * * Return: the forwarding mode as enum batadv_forw_mode and in case of * BATADV_FORW_SINGLE set the orig to the single originator the skb @@ -1380,17 +1381,16 @@ batadv_mcast_forw_rtr_node_get(struct batadv_priv *bat_priv, */ enum batadv_forw_mode batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, - struct batadv_orig_node **orig) + struct batadv_orig_node **orig, int *is_routable) { int ret, tt_count, ip_count, unsnoop_count, total_count; bool is_unsnoopable = false; unsigned int mcast_fanout; struct ethhdr *ethhdr; - int is_routable = 0; int rtr_count = 0; ret = batadv_mcast_forw_mode_check(bat_priv, skb, &is_unsnoopable, - &is_routable); + is_routable); if (ret == -ENOMEM) return BATADV_FORW_NONE; else if (ret < 0) @@ -1403,7 +1403,7 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, ip_count = batadv_mcast_forw_want_all_ip_count(bat_priv, ethhdr); unsnoop_count = !is_unsnoopable ? 0 : atomic_read(&bat_priv->mcast.num_want_all_unsnoopables); - rtr_count = batadv_mcast_forw_rtr_count(bat_priv, is_routable); + rtr_count = batadv_mcast_forw_rtr_count(bat_priv, *is_routable); total_count = tt_count + ip_count + unsnoop_count + rtr_count; @@ -1723,6 +1723,7 @@ batadv_mcast_forw_want_rtr(struct batadv_priv *bat_priv, * @bat_priv: the bat priv with all the soft interface information * @skb: the multicast packet to transmit * @vid: the vlan identifier + * @is_routable: stores whether the destination is routable * * Sends copies of a frame with multicast destination to any node that signaled * interest in it, that is either via the translation table or the according @@ -1735,7 +1736,7 @@ batadv_mcast_forw_want_rtr(struct batadv_priv *bat_priv, * is neither IPv4 nor IPv6. NET_XMIT_SUCCESS otherwise. */ int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb, - unsigned short vid) + unsigned short vid, int is_routable) { int ret; @@ -1751,12 +1752,16 @@ int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb, return ret; } + if (!is_routable) + goto skip_mc_router; + ret = batadv_mcast_forw_want_rtr(bat_priv, skb, vid); if (ret != NET_XMIT_SUCCESS) { kfree_skb(skb); return ret; } +skip_mc_router: consume_skb(skb); return ret; } diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h index 403929013ac4..fc1ffd22a671 100644 --- a/net/batman-adv/multicast.h +++ b/net/batman-adv/multicast.h @@ -44,7 +44,8 @@ enum batadv_forw_mode { enum batadv_forw_mode batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, - struct batadv_orig_node **mcast_single_orig); + struct batadv_orig_node **mcast_single_orig, + int *is_routable); int batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv, struct sk_buff *skb, @@ -52,7 +53,7 @@ int batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node); int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb, - unsigned short vid); + unsigned short vid, int is_routable); void batadv_mcast_init(struct batadv_priv *bat_priv); @@ -71,7 +72,8 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node); static inline enum batadv_forw_mode batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, - struct batadv_orig_node **mcast_single_orig) + struct batadv_orig_node **mcast_single_orig, + int *is_routable) { return BATADV_FORW_ALL; } @@ -88,7 +90,7 @@ batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv, static inline int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb, - unsigned short vid) + unsigned short vid, int is_routable) { kfree_skb(skb); return NET_XMIT_DROP; diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 7e052d6f759b..e59c5aa27ee0 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -1351,21 +1351,21 @@ static const struct genl_ops batadv_netlink_ops[] = { { .cmd = BATADV_CMD_TP_METER, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .doit = batadv_netlink_tp_meter_start, .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_TP_METER_CANCEL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .doit = batadv_netlink_tp_meter_cancel, .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_GET_ROUTING_ALGOS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_algo_dump, }, { @@ -1380,68 +1380,68 @@ static const struct genl_ops batadv_netlink_ops[] = { { .cmd = BATADV_CMD_GET_TRANSTABLE_LOCAL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_tt_local_dump, }, { .cmd = BATADV_CMD_GET_TRANSTABLE_GLOBAL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_tt_global_dump, }, { .cmd = BATADV_CMD_GET_ORIGINATORS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_orig_dump, }, { .cmd = BATADV_CMD_GET_NEIGHBORS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_hardif_neigh_dump, }, { .cmd = BATADV_CMD_GET_GATEWAYS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_gw_dump, }, { .cmd = BATADV_CMD_GET_BLA_CLAIM, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_bla_claim_dump, }, { .cmd = BATADV_CMD_GET_BLA_BACKBONE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_bla_backbone_dump, }, { .cmd = BATADV_CMD_GET_DAT_CACHE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_dat_cache_dump, }, { .cmd = BATADV_CMD_GET_MCAST_FLAGS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_mcast_flags_dump, }, { .cmd = BATADV_CMD_SET_MESH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .doit = batadv_netlink_set_mesh, .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_SET_HARDIF, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .doit = batadv_netlink_set_hardif, .internal_flags = BATADV_FLAG_NEED_MESH | BATADV_FLAG_NEED_HARDIF, @@ -1457,7 +1457,7 @@ static const struct genl_ops batadv_netlink_ops[] = { { .cmd = BATADV_CMD_SET_VLAN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .doit = batadv_netlink_set_vlan, .internal_flags = BATADV_FLAG_NEED_MESH | BATADV_FLAG_NEED_VLAN, diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 7f209390069e..504e3cb67bed 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -200,6 +200,7 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb, int gw_mode; enum batadv_forw_mode forw_mode = BATADV_FORW_SINGLE; struct batadv_orig_node *mcast_single_orig = NULL; + int mcast_is_routable = 0; int network_offset = ETH_HLEN; __be16 proto; @@ -302,7 +303,8 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb, send: if (do_bcast && !is_broadcast_ether_addr(ethhdr->h_dest)) { forw_mode = batadv_mcast_forw_mode(bat_priv, skb, - &mcast_single_orig); + &mcast_single_orig, + &mcast_is_routable); if (forw_mode == BATADV_FORW_NONE) goto dropped; @@ -367,7 +369,8 @@ send: ret = batadv_mcast_forw_send_orig(bat_priv, skb, vid, mcast_single_orig); } else if (forw_mode == BATADV_FORW_SOME) { - ret = batadv_mcast_forw_send(bat_priv, skb, vid); + ret = batadv_mcast_forw_send(bat_priv, skb, vid, + mcast_is_routable); } else { if (batadv_dat_snoop_outgoing_arp_request(bat_priv, skb)) diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 0a2d78e811cf..83eb84e8e688 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -501,9 +501,7 @@ static int __init cmtp_init(void) { BT_INFO("CMTP (CAPI Emulation) ver %s", VERSION); - cmtp_init_sockets(); - - return 0; + return cmtp_init_sockets(); } static void __exit cmtp_exit(void) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 811f1b9a7eca..a0f88081d56e 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3410,6 +3410,7 @@ int hci_register_dev(struct hci_dev *hdev) return id; err_wqueue: + debugfs_remove_recursive(hdev->debugfs); destroy_workqueue(hdev->workqueue); destroy_workqueue(hdev->req_workqueue); err: diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 786c5b67bc0b..406c6b4eba82 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4950,8 +4950,9 @@ static void hci_disconn_phylink_complete_evt(struct hci_dev *hdev, hci_dev_lock(hdev); hcon = hci_conn_hash_lookup_handle(hdev, ev->phy_handle); - if (hcon) { + if (hcon && hcon->type == AMP_LINK) { hcon->state = BT_CLOSED; + hci_disconn_cfm(hcon, ev->reason); hci_conn_del(hcon); } @@ -5545,7 +5546,13 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb) struct hci_ev_le_advertising_info *ev = ptr; s8 rssi; - if (ev->length <= HCI_MAX_AD_LENGTH) { + if (ptr > (void *)skb_tail_pointer(skb) - sizeof(*ev)) { + bt_dev_err(hdev, "Malicious advertising data."); + break; + } + + if (ev->length <= HCI_MAX_AD_LENGTH && + ev->data + ev->length <= skb_tail_pointer(skb)) { rssi = ev->data[ev->length]; process_adv_report(hdev, ev->evt_type, &ev->bdaddr, ev->bdaddr_type, NULL, 0, rssi, diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 0dd8984a261d..f085b1648e66 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -33,7 +33,6 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats); const struct nf_br_ops *nf_ops; const unsigned char *dest; - struct ethhdr *eth; u16 vid = 0; rcu_read_lock(); @@ -53,15 +52,14 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) BR_INPUT_SKB_CB(skb)->frag_max_size = 0; skb_reset_mac_header(skb); - eth = eth_hdr(skb); skb_pull(skb, ETH_HLEN); if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid)) goto out; if (IS_ENABLED(CONFIG_INET) && - (eth->h_proto == htons(ETH_P_ARP) || - eth->h_proto == htons(ETH_P_RARP)) && + (eth_hdr(skb)->h_proto == htons(ETH_P_ARP) || + eth_hdr(skb)->h_proto == htons(ETH_P_RARP)) && br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) { br_do_proxy_suppress_arp(skb, br, vid, NULL); } else if (IS_ENABLED(CONFIG_IPV6) && diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 2371b833b2bc..480e4111b24c 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -743,6 +743,9 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu) mtu = nf_bridge->frag_max_size; + nf_bridge_update_protocol(skb); + nf_bridge_push_encap_header(skb); + if (skb_is_gso(skb) || skb->len + mtu_reserved <= mtu) { nf_bridge_info_free(skb); return br_dev_queue_push_xmit(net, sk, skb); @@ -760,8 +763,6 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; - nf_bridge_update_protocol(skb); - data = this_cpu_ptr(&brnf_frag_data_storage); if (skb_vlan_tag_present(skb)) { @@ -789,8 +790,6 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; - nf_bridge_update_protocol(skb); - data = this_cpu_ptr(&brnf_frag_data_storage); data->encap_size = nf_bridge_encap_header_len(skb); data->size = ETH_HLEN + data->encap_size; diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index 811682e06951..22f4b798d385 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -2004,6 +2004,12 @@ static void j1939_tp_cmd_recv(struct j1939_priv *priv, struct sk_buff *skb) extd = J1939_ETP; /* fall through */ case J1939_TP_CMD_BAM: /* fall through */ + if (cmd == J1939_TP_CMD_BAM && !j1939_cb_is_broadcast(skcb)) { + netdev_err_once(priv->ndev, "%s: BAM to unicast (%02x), ignoring!\n", + __func__, skcb->addr.sa); + return; + } + fallthrough; case J1939_TP_CMD_RTS: /* fall through */ if (skcb->addr.type != extd) return; diff --git a/net/core/dev.c b/net/core/dev.c index ff336417c9b9..a03036456221 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3766,7 +3766,10 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) if (dev->flags & IFF_UP) { int cpu = smp_processor_id(); /* ok because BHs are off */ - if (txq->xmit_lock_owner != cpu) { + /* Other cpus might concurrently change txq->xmit_lock_owner + * to -1 or to their cpu id, but not to our id. + */ + if (READ_ONCE(txq->xmit_lock_owner) != cpu) { if (dev_xmit_recursion()) goto recursion_alert; diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index af0130039f37..e8e8389ddc96 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -277,13 +277,17 @@ static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi, rcu_read_lock(); list_for_each_entry_rcu(new_stat, &hw_stats_list, list) { + struct net_device *dev; + /* * only add a note to our monitor buffer if: * 1) this is the dev we received on * 2) its after the last_rx delta * 3) our rx_dropped count has gone up */ - if ((new_stat->dev == napi->dev) && + /* Paired with WRITE_ONCE() in dropmon_net_event() */ + dev = READ_ONCE(new_stat->dev); + if ((dev == napi->dev) && (time_after(jiffies, new_stat->last_rx + dm_hw_check_delta)) && (napi->dev->stats.rx_dropped != new_stat->last_drop_val)) { trace_drop_common(NULL, NULL); @@ -1497,7 +1501,10 @@ static int dropmon_net_event(struct notifier_block *ev_block, mutex_lock(&net_dm_mutex); list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) { if (new_stat->dev == dev) { - new_stat->dev = NULL; + + /* Paired with READ_ONCE() in trace_napi_poll_hit() */ + WRITE_ONCE(new_stat->dev, NULL); + if (trace_state == TRACE_OFF) { list_del_rcu(&new_stat->list); kfree_rcu(new_stat, rcu); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 675f27ef6872..83299a85480a 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -300,7 +300,7 @@ jumped: else err = ops->action(rule, fl, flags, arg); - if (!err && ops->suppress && ops->suppress(rule, arg)) + if (!err && ops->suppress && ops->suppress(rule, flags, arg)) continue; if (err != -EAGAIN) { diff --git a/net/core/filter.c b/net/core/filter.c index 5ebc973ed4c5..e16b2b5cda98 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2516,6 +2516,9 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, if (unlikely(flags)) return -EINVAL; + if (unlikely(len == 0)) + return 0; + /* First find the starting scatterlist element */ i = msg->sg.start; do { @@ -4248,12 +4251,14 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, switch (optname) { case SO_RCVBUF: val = min_t(u32, val, sysctl_rmem_max); + val = min_t(int, val, INT_MAX / 2); sk->sk_userlocks |= SOCK_RCVBUF_LOCK; WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF)); break; case SO_SNDBUF: val = min_t(u32, val, sysctl_wmem_max); + val = min_t(int, val, INT_MAX / 2); sk->sk_userlocks |= SOCK_SNDBUF_LOCK; WRITE_ONCE(sk->sk_sndbuf, max_t(int, val * 2, SOCK_MIN_SNDBUF)); @@ -5819,24 +5824,33 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len if (!th->ack || th->rst || th->syn) return -ENOENT; + if (unlikely(iph_len < sizeof(struct iphdr))) + return -EINVAL; + if (tcp_synq_no_recent_overflow(sk)) return -ENOENT; cookie = ntohl(th->ack_seq) - 1; - switch (sk->sk_family) { - case AF_INET: - if (unlikely(iph_len < sizeof(struct iphdr))) + /* Both struct iphdr and struct ipv6hdr have the version field at the + * same offset so we can cast to the shorter header (struct iphdr). + */ + switch (((struct iphdr *)iph)->version) { + case 4: + if (sk->sk_family == AF_INET6 && ipv6_only_sock(sk)) return -EINVAL; ret = __cookie_v4_check((struct iphdr *)iph, th, cookie); break; #if IS_BUILTIN(CONFIG_IPV6) - case AF_INET6: + case 6: if (unlikely(iph_len < sizeof(struct ipv6hdr))) return -EINVAL; + if (sk->sk_family != AF_INET6) + return -EINVAL; + ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie); break; #endif /* CONFIG_IPV6 */ @@ -6703,6 +6717,7 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) { const int size_default = sizeof(__u32); + int field_size; if (off < 0 || off >= sizeof(struct bpf_sock)) return false; @@ -6714,7 +6729,6 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, case offsetof(struct bpf_sock, family): case offsetof(struct bpf_sock, type): case offsetof(struct bpf_sock, protocol): - case offsetof(struct bpf_sock, dst_port): case offsetof(struct bpf_sock, src_port): case bpf_ctx_range(struct bpf_sock, src_ip4): case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]): @@ -6722,6 +6736,14 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]): bpf_ctx_record_field_size(info, size_default); return bpf_ctx_narrow_access_ok(off, size, size_default); + case bpf_ctx_range(struct bpf_sock, dst_port): + field_size = size == size_default ? + size_default : sizeof_field(struct bpf_sock, dst_port); + bpf_ctx_record_field_size(info, field_size); + return bpf_ctx_narrow_access_ok(off, size, field_size); + case offsetofend(struct bpf_sock, dst_port) ... + offsetof(struct bpf_sock, dst_ip4) - 1: + return false; } return size == size_default; @@ -6910,9 +6932,9 @@ void bpf_warn_invalid_xdp_action(u32 act) { const u32 act_max = XDP_REDIRECT; - WARN_ONCE(1, "%s XDP return value %u, expect packet loss!\n", - act > act_max ? "Illegal" : "Driver unsupported", - act); + pr_warn_once("%s XDP return value %u, expect packet loss!\n", + act > act_max ? "Illegal" : "Driver unsupported", + act); } EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index b740a74f06f2..4dac27c98623 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1149,6 +1149,7 @@ proto_again: VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; } key_vlan->vlan_tpid = saved_vlan_tpid; + key_vlan->vlan_eth_type = proto; } fdret = FLOW_DISSECT_RET_PROTO_AGAIN; diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index a5502c5aa44e..bf270b6a99b4 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -158,10 +158,8 @@ static int bpf_output(struct net *net, struct sock *sk, struct sk_buff *skb) return dst->lwtstate->orig_output(net, sk, skb); } -static int xmit_check_hhlen(struct sk_buff *skb) +static int xmit_check_hhlen(struct sk_buff *skb, int hh_len) { - int hh_len = skb_dst(skb)->dev->hard_header_len; - if (skb_headroom(skb) < hh_len) { int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb)); @@ -273,6 +271,7 @@ static int bpf_xmit(struct sk_buff *skb) bpf = bpf_lwt_lwtunnel(dst->lwtstate); if (bpf->xmit.prog) { + int hh_len = dst->dev->hard_header_len; __be16 proto = skb->protocol; int ret; @@ -290,7 +289,7 @@ static int bpf_xmit(struct sk_buff *skb) /* If the header was expanded, headroom might be too * small for L2 header to come, expand as needed. */ - ret = xmit_check_hhlen(skb); + ret = xmit_check_hhlen(skb, hh_len); if (unlikely(ret)) return ret; diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 2f9c0de533c7..0b64f015b3b0 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -190,6 +190,10 @@ int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining, nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); if (nla_entype) { + if (nla_len(nla_entype) < sizeof(u16)) { + NL_SET_ERR_MSG(extack, "Invalid RTA_ENCAP_TYPE"); + return -EINVAL; + } encap_type = nla_get_u16(nla_entype); if (lwtunnel_valid_encap_type(encap_type, diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 02e55041a881..8b6140e67e7f 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -734,11 +734,10 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, ASSERT_RTNL(); - n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL); + n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL); if (!n) goto out; - n->protocol = 0; write_pnet(&n->net, net); memcpy(n->key, pkey, key_len); n->dev = dev; diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index 36347933ec3a..61f5570645e3 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -182,12 +182,23 @@ static const struct seq_operations softnet_seq_ops = { .show = softnet_seq_show, }; -static void *ptype_get_idx(loff_t pos) +static void *ptype_get_idx(struct seq_file *seq, loff_t pos) { + struct list_head *ptype_list = NULL; struct packet_type *pt = NULL; + struct net_device *dev; loff_t i = 0; int t; + for_each_netdev_rcu(seq_file_net(seq), dev) { + ptype_list = &dev->ptype_all; + list_for_each_entry_rcu(pt, ptype_list, list) { + if (i == pos) + return pt; + ++i; + } + } + list_for_each_entry_rcu(pt, &ptype_all, list) { if (i == pos) return pt; @@ -208,22 +219,40 @@ static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { rcu_read_lock(); - return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN; + return *pos ? ptype_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct net_device *dev; struct packet_type *pt; struct list_head *nxt; int hash; ++*pos; if (v == SEQ_START_TOKEN) - return ptype_get_idx(0); + return ptype_get_idx(seq, 0); pt = v; nxt = pt->list.next; + if (pt->dev) { + if (nxt != &pt->dev->ptype_all) + goto found; + + dev = pt->dev; + for_each_netdev_continue_rcu(seq_file_net(seq), dev) { + if (!list_empty(&dev->ptype_all)) { + nxt = dev->ptype_all.next; + goto found; + } + } + + nxt = ptype_all.next; + goto ptype_all; + } + if (pt->type == htons(ETH_P_ALL)) { +ptype_all: if (nxt != &ptype_all) goto found; hash = 0; @@ -252,7 +281,8 @@ static int ptype_seq_show(struct seq_file *seq, void *v) if (v == SEQ_START_TOKEN) seq_puts(seq, "Type Device Function\n"); - else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) { + else if ((!pt->af_packet_net || net_eq(pt->af_packet_net, seq_file_net(seq))) && + (!pt->dev || net_eq(dev_net(pt->dev), seq_file_net(seq)))) { if (pt->type == htons(ETH_P_ALL)) seq_puts(seq, "ALL "); else diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 05b0c60bfba2..ad45f13a0370 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -212,7 +212,7 @@ static ssize_t speed_show(struct device *dev, if (!rtnl_trylock()) return restart_syscall(); - if (netif_running(netdev)) { + if (netif_running(netdev) && netif_device_present(netdev)) { struct ethtool_link_ksettings cmd; if (!__ethtool_get_link_ksettings(netdev, &cmd)) @@ -1661,6 +1661,9 @@ static void remove_queue_kobjects(struct net_device *dev) net_rx_queue_update_kobjects(dev, real_rx, 0); netdev_queue_update_kobjects(dev, real_tx, 0); + + dev->real_num_rx_queues = 0; + dev->real_num_tx_queues = 0; #ifdef CONFIG_SYSFS kset_unregister(dev->queues_kset); #endif diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index cd1d40195e46..62a972f04cef 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -168,8 +168,10 @@ static void ops_exit_list(const struct pernet_operations *ops, { struct net *net; if (ops->exit) { - list_for_each_entry(net, net_exit_list, exit_list) + list_for_each_entry(net, net_exit_list, exit_list) { ops->exit(net); + cond_resched(); + } } if (ops->exit_batch) ops->exit_batch(net_exit_list); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 55c0f32b9375..dbc9b2f53649 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3022,8 +3022,8 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1]; unsigned char name_assign_type = NET_NAME_USER; struct nlattr *linkinfo[IFLA_INFO_MAX + 1]; - const struct rtnl_link_ops *m_ops = NULL; - struct net_device *master_dev = NULL; + const struct rtnl_link_ops *m_ops; + struct net_device *master_dev; struct net *net = sock_net(skb->sk); const struct rtnl_link_ops *ops; struct nlattr *tb[IFLA_MAX + 1]; @@ -3063,6 +3063,8 @@ replay: dev = NULL; } + master_dev = NULL; + m_ops = NULL; if (dev) { master_dev = netdev_master_upper_dev_get(dev); if (master_dev) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index a172d2fe4ff1..9d5be649ec8f 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -768,7 +768,7 @@ void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt) ntohs(skb->protocol), skb->pkt_type, skb->skb_iif); if (dev) - printk("%sdev name=%s feat=0x%pNF\n", + printk("%sdev name=%s feat=%pNF\n", level, dev->name, &dev->features); if (sk) printk("%ssk family=%hu type=%u proto=%u\n", @@ -2165,7 +2165,7 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta) /* Free pulled out fragments. */ while ((list = skb_shinfo(skb)->frag_list) != insp) { skb_shinfo(skb)->frag_list = list->next; - kfree_skb(list); + consume_skb(list); } /* And insert new clone at head. */ if (clone) { @@ -5872,7 +5872,7 @@ static int pskb_carve_frag_list(struct sk_buff *skb, /* Free pulled out fragments. */ while ((list = shinfo->frag_list) != insp) { shinfo->frag_list = list->next; - kfree_skb(list); + consume_skb(list); } /* And insert new clone at head. */ if (clone) { diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 17cc1edd149c..a606ad8e8be2 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -27,6 +27,7 @@ int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len, int elem_first_coalesce) { struct page_frag *pfrag = sk_page_frag(sk); + u32 osize = msg->sg.size; int ret = 0; len -= msg->sg.size; @@ -35,13 +36,17 @@ int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len, u32 orig_offset; int use, i; - if (!sk_page_frag_refill(sk, pfrag)) - return -ENOMEM; + if (!sk_page_frag_refill(sk, pfrag)) { + ret = -ENOMEM; + goto msg_trim; + } orig_offset = pfrag->offset; use = min_t(int, len, pfrag->size - orig_offset); - if (!sk_wmem_schedule(sk, use)) - return -ENOMEM; + if (!sk_wmem_schedule(sk, use)) { + ret = -ENOMEM; + goto msg_trim; + } i = msg->sg.end; sk_msg_iter_var_prev(i); @@ -71,6 +76,10 @@ int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len, } return ret; + +msg_trim: + sk_msg_trim(sk, msg, osize); + return ret; } EXPORT_SYMBOL_GPL(sk_msg_alloc); diff --git a/net/core/sock.c b/net/core/sock.c index 57b7a10703c3..c84f68bff7f5 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2355,8 +2355,6 @@ static void sk_leave_memory_pressure(struct sock *sk) } } -/* On 32bit arches, an skb frag is limited to 2^15 */ -#define SKB_FRAG_PAGE_ORDER get_order(32768) DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key); /** diff --git a/net/core/sock_map.c b/net/core/sock_map.c index df52061f99f7..2646e8f98f67 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -48,7 +48,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) if (err) goto free_stab; - stab->sks = bpf_map_area_alloc(stab->map.max_entries * + stab->sks = bpf_map_area_alloc((u64) stab->map.max_entries * sizeof(struct sock *), stab->map.numa_node); if (stab->sks) diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index e9ecbb57df45..b53d5e1d026f 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -2063,10 +2063,54 @@ u8 dcb_ieee_getapp_default_prio_mask(const struct net_device *dev) } EXPORT_SYMBOL(dcb_ieee_getapp_default_prio_mask); +static void dcbnl_flush_dev(struct net_device *dev) +{ + struct dcb_app_type *itr, *tmp; + + spin_lock_bh(&dcb_lock); + + list_for_each_entry_safe(itr, tmp, &dcb_app_list, list) { + if (itr->ifindex == dev->ifindex) { + list_del(&itr->list); + kfree(itr); + } + } + + spin_unlock_bh(&dcb_lock); +} + +static int dcbnl_netdevice_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + switch (event) { + case NETDEV_UNREGISTER: + if (!dev->dcbnl_ops) + return NOTIFY_DONE; + + dcbnl_flush_dev(dev); + + return NOTIFY_OK; + default: + return NOTIFY_DONE; + } +} + +static struct notifier_block dcbnl_nb __read_mostly = { + .notifier_call = dcbnl_netdevice_event, +}; + static int __init dcbnl_init(void) { + int err; + INIT_LIST_HEAD(&dcb_app_list); + err = register_netdevice_notifier(&dcbnl_nb); + if (err) + return err; + rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL, 0); rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL, 0); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index d19557c6d04b..7cf903f9e29a 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -427,7 +427,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk, if (__inet_inherit_port(sk, newsk) < 0) goto put_and_exit; - *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL); if (*own_req) ireq->ireq_opt = NULL; else diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 9f73ccf46c9b..7c24927e9c2c 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -538,7 +538,7 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, dccp_done(newsk); goto out; } - *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL); /* Clone pktoptions received with SYN, if we own the req */ if (*own_req && ireq->pktopts) { newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC); diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 0176786a13d4..0ede16508d5c 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -673,6 +673,7 @@ static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn) struct net_device *master; master = of_find_net_device_by_node(ethernet); + of_node_put(ethernet); if (!master) return -EPROBE_DEFER; diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index b2ba1d2556f1..e0df892b5c37 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -1457,7 +1457,7 @@ static int nl802154_send_key(struct sk_buff *msg, u32 cmd, u32 portid, hdr = nl802154hdr_put(msg, portid, seq, flags, cmd); if (!hdr) - return -1; + return -ENOBUFS; if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; @@ -1650,7 +1650,7 @@ static int nl802154_send_device(struct sk_buff *msg, u32 cmd, u32 portid, hdr = nl802154hdr_put(msg, portid, seq, flags, cmd); if (!hdr) - return -1; + return -ENOBUFS; if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; @@ -1828,7 +1828,7 @@ static int nl802154_send_devkey(struct sk_buff *msg, u32 cmd, u32 portid, hdr = nl802154hdr_put(msg, portid, seq, flags, cmd); if (!hdr) - return -1; + return -ENOBUFS; if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; @@ -2004,7 +2004,7 @@ static int nl802154_send_seclevel(struct sk_buff *msg, u32 cmd, u32 portid, hdr = nl802154hdr_put(msg, portid, seq, flags, cmd); if (!hdr) - return -1; + return -ENOBUFS; if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 70f92aaca411..a7a6b1adb698 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1344,8 +1344,11 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, } ops = rcu_dereference(inet_offloads[proto]); - if (likely(ops && ops->callbacks.gso_segment)) + if (likely(ops && ops->callbacks.gso_segment)) { segs = ops->callbacks.gso_segment(skb, features); + if (!segs) + skb->network_header = skb_mac_header(skb) + nhoff - skb->head; + } if (IS_ERR_OR_NULL(segs)) goto out; @@ -1974,6 +1977,10 @@ static int __init inet_init(void) ip_init(); + /* Initialise per-cpu ipv4 mibs */ + if (init_ipv4_mibs()) + panic("%s: Cannot init ipv4 mibs\n", __func__); + /* Setup TCP slab cache for open requests. */ tcp_init(); @@ -2004,12 +2011,6 @@ static int __init inet_init(void) if (init_inet_pernet_ops()) pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__); - /* - * Initialise per-cpu ipv4 mibs - */ - - if (init_ipv4_mibs()) - pr_crit("%s: Cannot init ipv4 mibs\n", __func__); ipv4_proc_init(); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 7b951992c372..b8fe943ae89d 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1116,13 +1116,18 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev) return err; } -static int arp_invalidate(struct net_device *dev, __be32 ip) +int arp_invalidate(struct net_device *dev, __be32 ip, bool force) { struct neighbour *neigh = neigh_lookup(&arp_tbl, &ip, dev); int err = -ENXIO; struct neigh_table *tbl = &arp_tbl; if (neigh) { + if ((neigh->nud_state & NUD_VALID) && !force) { + neigh_release(neigh); + return 0; + } + if (neigh->nud_state & ~NUD_NOARP) err = neigh_update(neigh, NULL, NUD_FAILED, NEIGH_UPDATE_F_OVERRIDE| @@ -1169,7 +1174,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r, if (!dev) return -EINVAL; } - return arp_invalidate(dev, ip); + return arp_invalidate(dev, ip, true); } /* diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 603a3495afa6..4a8ad46397c0 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2585,7 +2585,7 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name, free: kfree(t); out: - return -ENOBUFS; + return -ENOMEM; } static void __devinet_sysctl_unregister(struct net *net, diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 86c836fa2145..ef20f550d2f8 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -277,6 +277,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * struct page *page; struct sk_buff *trailer; int tailen = esp->tailen; + unsigned int allocsz; /* this is non-NULL only with UDP Encapsulation */ if (x->encap) { @@ -286,6 +287,10 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * return err; } + allocsz = ALIGN(skb->data_len + tailen, L1_CACHE_BYTES); + if (allocsz > ESP_SKB_FRAG_MAXSIZE) + goto cow; + if (!skb_cloned(skb)) { if (tailen <= skb_tailroom(skb)) { nfrags = 1; @@ -499,7 +504,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb); u32 padto; - padto = min(x->tfcpad, __xfrm_state_mtu(x, dst->child_mtu_cached)); + padto = min(x->tfcpad, xfrm_state_mtu(x, dst->child_mtu_cached)); if (skb->len < padto) esp.tfclen = padto - skb->len; } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index b875b98820ed..ef3e7a3e3a29 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1122,9 +1122,11 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) return; /* Add broadcast address, if it is explicitly assigned. */ - if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) + if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) { fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim, 0); + arp_invalidate(dev, ifa->ifa_broadcast, false); + } if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) && (prefix != addr || ifa->ifa_prefixlen < 32)) { @@ -1140,6 +1142,7 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) prim, 0); fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask, 32, prim, 0); + arp_invalidate(dev, prefix | ~mask, false); } } } @@ -1588,7 +1591,7 @@ static int __net_init fib_net_init(struct net *net) int error; #ifdef CONFIG_IP_ROUTE_CLASSID - net->ipv4.fib_num_tclassid_users = 0; + atomic_set(&net->ipv4.fib_num_tclassid_users, 0); #endif error = ip_fib_net_init(net); if (error < 0) diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index b43a7ba5c6a4..e9a3cc9e98df 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -137,7 +137,7 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, return err; } -static bool fib4_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg) +static bool fib4_rule_suppress(struct fib_rule *rule, int flags, struct fib_lookup_arg *arg) { struct fib_result *result = (struct fib_result *) arg->result; struct net_device *dev = NULL; @@ -258,7 +258,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, if (tb[FRA_FLOW]) { rule4->tclassid = nla_get_u32(tb[FRA_FLOW]); if (rule4->tclassid) - net->ipv4.fib_num_tclassid_users++; + atomic_inc(&net->ipv4.fib_num_tclassid_users); } #endif @@ -290,7 +290,7 @@ static int fib4_rule_delete(struct fib_rule *rule) #ifdef CONFIG_IP_ROUTE_CLASSID if (((struct fib4_rule *)rule)->tclassid) - net->ipv4.fib_num_tclassid_users--; + atomic_dec(&net->ipv4.fib_num_tclassid_users); #endif net->ipv4.fib_has_custom_rules = true; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index dce85a9c20c6..f99ad4a98907 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -29,6 +29,7 @@ #include <linux/init.h> #include <linux/slab.h> #include <linux/netlink.h> +#include <linux/hash.h> #include <net/arp.h> #include <net/ip.h> @@ -222,7 +223,7 @@ void fib_nh_release(struct net *net, struct fib_nh *fib_nh) { #ifdef CONFIG_IP_ROUTE_CLASSID if (fib_nh->nh_tclassid) - net->ipv4.fib_num_tclassid_users--; + atomic_dec(&net->ipv4.fib_num_tclassid_users); #endif fib_nh_common_release(&fib_nh->nh_common); } @@ -318,11 +319,15 @@ static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi) static inline unsigned int fib_devindex_hashfn(unsigned int val) { - unsigned int mask = DEVINDEX_HASHSIZE - 1; + return hash_32(val, DEVINDEX_HASHBITS); +} + +static struct hlist_head * +fib_info_devhash_bucket(const struct net_device *dev) +{ + u32 val = net_hash_mix(dev_net(dev)) ^ dev->ifindex; - return (val ^ - (val >> DEVINDEX_HASHBITS) ^ - (val >> (DEVINDEX_HASHBITS * 2))) & mask; + return &fib_info_devhash[fib_devindex_hashfn(val)]; } static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope, @@ -432,12 +437,11 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev) { struct hlist_head *head; struct fib_nh *nh; - unsigned int hash; spin_lock(&fib_info_lock); - hash = fib_devindex_hashfn(dev->ifindex); - head = &fib_info_devhash[hash]; + head = fib_info_devhash_bucket(dev); + hlist_for_each_entry(nh, head, nh_hash) { if (nh->fib_nh_dev == dev && nh->fib_nh_gw4 == gw && @@ -624,7 +628,7 @@ int fib_nh_init(struct net *net, struct fib_nh *nh, #ifdef CONFIG_IP_ROUTE_CLASSID nh->nh_tclassid = cfg->fc_flow; if (nh->nh_tclassid) - net->ipv4.fib_num_tclassid_users++; + atomic_inc(&net->ipv4.fib_num_tclassid_users); #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH nh->fib_nh_weight = nh_weight; @@ -654,6 +658,19 @@ static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining, return nhs; } +static int fib_gw_from_attr(__be32 *gw, struct nlattr *nla, + struct netlink_ext_ack *extack) +{ + if (nla_len(nla) < sizeof(*gw)) { + NL_SET_ERR_MSG(extack, "Invalid IPv4 address in RTA_GATEWAY"); + return -EINVAL; + } + + *gw = nla_get_in_addr(nla); + + return 0; +} + /* only called when fib_nh is integrated into fib_info */ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, int remaining, struct fib_config *cfg, @@ -696,7 +713,11 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, return -EINVAL; } if (nla) { - fib_cfg.fc_gw4 = nla_get_in_addr(nla); + ret = fib_gw_from_attr(&fib_cfg.fc_gw4, nla, + extack); + if (ret) + goto errout; + if (fib_cfg.fc_gw4) fib_cfg.fc_gw_family = AF_INET; } else if (nlav) { @@ -706,10 +727,18 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, } nla = nla_find(attrs, attrlen, RTA_FLOW); - if (nla) + if (nla) { + if (nla_len(nla) < sizeof(u32)) { + NL_SET_ERR_MSG(extack, "Invalid RTA_FLOW"); + return -EINVAL; + } fib_cfg.fc_flow = nla_get_u32(nla); + } fib_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP); + /* RTA_ENCAP_TYPE length checked in + * lwtunnel_valid_encap_type_attr + */ nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); if (nla) fib_cfg.fc_encap_type = nla_get_u16(nla); @@ -847,8 +876,13 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, } if (cfg->fc_oif || cfg->fc_gw_family) { - struct fib_nh *nh = fib_info_nh(fi, 0); + struct fib_nh *nh; + /* cannot match on nexthop object attributes */ + if (fi->nh) + return 1; + + nh = fib_info_nh(fi, 0); if (cfg->fc_encap) { if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap, nh, cfg, extack)) @@ -894,6 +928,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, attrlen = rtnh_attrlen(rtnh); if (attrlen > 0) { struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh); + int err; nla = nla_find(attrs, attrlen, RTA_GATEWAY); nlav = nla_find(attrs, attrlen, RTA_VIA); @@ -904,12 +939,17 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, } if (nla) { + __be32 gw; + + err = fib_gw_from_attr(&gw, nla, extack); + if (err) + return err; + if (nh->fib_nh_gw_family != AF_INET || - nla_get_in_addr(nla) != nh->fib_nh_gw4) + gw != nh->fib_nh_gw4) return 1; } else if (nlav) { struct fib_config cfg2; - int err; err = fib_gw_from_via(&cfg2, nlav, extack); if (err) @@ -932,8 +972,14 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, #ifdef CONFIG_IP_ROUTE_CLASSID nla = nla_find(attrs, attrlen, RTA_FLOW); - if (nla && nla_get_u32(nla) != nh->nh_tclassid) - return 1; + if (nla) { + if (nla_len(nla) < sizeof(u32)) { + NL_SET_ERR_MSG(extack, "Invalid RTA_FLOW"); + return -EINVAL; + } + if (nla_get_u32(nla) != nh->nh_tclassid) + return 1; + } #endif } @@ -1557,12 +1603,10 @@ link_it: } else { change_nexthops(fi) { struct hlist_head *head; - unsigned int hash; if (!nexthop_nh->fib_nh_dev) continue; - hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex); - head = &fib_info_devhash[hash]; + head = fib_info_devhash_bucket(nexthop_nh->fib_nh_dev); hlist_add_head(&nexthop_nh->nh_hash, head); } endfor_nexthops(fi) } @@ -1903,8 +1947,7 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig) void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) { - unsigned int hash = fib_devindex_hashfn(dev->ifindex); - struct hlist_head *head = &fib_info_devhash[hash]; + struct hlist_head *head = fib_info_devhash_bucket(dev); struct fib_nh *nh; hlist_for_each_entry(nh, head, nh_hash) { @@ -1923,12 +1966,11 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) */ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) { - int ret = 0; - int scope = RT_SCOPE_NOWHERE; + struct hlist_head *head = fib_info_devhash_bucket(dev); struct fib_info *prev_fi = NULL; - unsigned int hash = fib_devindex_hashfn(dev->ifindex); - struct hlist_head *head = &fib_info_devhash[hash]; + int scope = RT_SCOPE_NOWHERE; struct fib_nh *nh; + int ret = 0; if (force) scope = -1; @@ -2073,7 +2115,6 @@ out: int fib_sync_up(struct net_device *dev, unsigned char nh_flags) { struct fib_info *prev_fi; - unsigned int hash; struct hlist_head *head; struct fib_nh *nh; int ret; @@ -2089,8 +2130,7 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags) } prev_fi = NULL; - hash = fib_devindex_hashfn(dev->ifindex); - head = &fib_info_devhash[hash]; + head = fib_info_devhash_bucket(dev); ret = 0; hlist_for_each_entry(nh, head, nh_hash) { diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index b1ecc9195517..cac2fdd08df0 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2403,9 +2403,10 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct newpsl->sl_addr[i] = psl->sl_addr[i]; /* decrease mem now to avoid the memleak warning */ atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); - kfree_rcu(psl, rcu); } rcu_assign_pointer(pmc->sflist, newpsl); + if (psl) + kfree_rcu(psl, rcu); psl = newpsl; } rv = 1; /* > 0 for insert logic below if sl_count is 0 */ @@ -2503,11 +2504,13 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) psl->sl_count, psl->sl_addr, 0); /* decrease mem now to avoid the memleak warning */ atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); - kfree_rcu(psl, rcu); - } else + } else { (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, 0, NULL, 0); + } rcu_assign_pointer(pmc->sflist, newpsl); + if (psl) + kfree_rcu(psl, rcu); pmc->sfmode = msf->imsf_fmode; err = 0; done: diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 85a88425edc4..6cbf0db57ad0 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -791,7 +791,7 @@ static void reqsk_queue_hash_req(struct request_sock *req, timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED); mod_timer(&req->rsk_timer, jiffies + timeout); - inet_ehash_insert(req_to_sk(req), NULL); + inet_ehash_insert(req_to_sk(req), NULL, NULL); /* before letting lookups find us, make sure all req fields * are committed to memory and refcnt initialized. */ diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 4f71aca15666..f8f79672cc5f 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -200,6 +200,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->idiag_state = sk->sk_state; r->idiag_timer = 0; r->idiag_retrans = 0; + r->idiag_expires = 0; if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin)) goto errout; @@ -240,20 +241,17 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->idiag_timer = 1; r->idiag_retrans = icsk->icsk_retransmits; r->idiag_expires = - jiffies_to_msecs(icsk->icsk_timeout - jiffies); + jiffies_delta_to_msecs(icsk->icsk_timeout - jiffies); } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { r->idiag_timer = 4; r->idiag_retrans = icsk->icsk_probes_out; r->idiag_expires = - jiffies_to_msecs(icsk->icsk_timeout - jiffies); + jiffies_delta_to_msecs(icsk->icsk_timeout - jiffies); } else if (timer_pending(&sk->sk_timer)) { r->idiag_timer = 2; r->idiag_retrans = icsk->icsk_probes_out; r->idiag_expires = - jiffies_to_msecs(sk->sk_timer.expires - jiffies); - } else { - r->idiag_timer = 0; - r->idiag_expires = 0; + jiffies_delta_to_msecs(sk->sk_timer.expires - jiffies); } if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) { @@ -338,16 +336,13 @@ static int inet_twsk_diag_fill(struct sock *sk, r = nlmsg_data(nlh); BUG_ON(tw->tw_state != TCP_TIME_WAIT); - tmo = tw->tw_timer.expires - jiffies; - if (tmo < 0) - tmo = 0; - inet_diag_msg_common_fill(r, sk); r->idiag_retrans = 0; r->idiag_state = tw->tw_substate; r->idiag_timer = 3; - r->idiag_expires = jiffies_to_msecs(tmo); + tmo = tw->tw_timer.expires - jiffies; + r->idiag_expires = jiffies_delta_to_msecs(tmo); r->idiag_rqueue = 0; r->idiag_wqueue = 0; r->idiag_uid = 0; @@ -381,7 +376,7 @@ static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, offsetof(struct sock, sk_cookie)); tmo = inet_reqsk(sk)->rsk_timer.expires - jiffies; - r->idiag_expires = (tmo >= 0) ? jiffies_to_msecs(tmo) : 0; + r->idiag_expires = jiffies_delta_to_msecs(tmo); r->idiag_rqueue = 0; r->idiag_wqueue = 0; r->idiag_uid = 0; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 10d31733297d..e0e8a65d561e 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -204,9 +204,9 @@ void inet_frag_kill(struct inet_frag_queue *fq) /* The RCU read lock provides a memory barrier * guaranteeing that if fqdir->dead is false then * the hash table destruction will not start until - * after we unlock. Paired with inet_frags_exit_net(). + * after we unlock. Paired with fqdir_pre_exit(). */ - if (!fqdir->dead) { + if (!READ_ONCE(fqdir->dead)) { rhashtable_remove_fast(&fqdir->rhashtable, &fq->node, fqdir->f->rhash_params); refcount_dec(&fq->refcnt); @@ -321,9 +321,11 @@ static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir, /* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */ struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key) { + /* This pairs with WRITE_ONCE() in fqdir_pre_exit(). */ + long high_thresh = READ_ONCE(fqdir->high_thresh); struct inet_frag_queue *fq = NULL, *prev; - if (!fqdir->high_thresh || frag_mem_limit(fqdir) > fqdir->high_thresh) + if (!high_thresh || frag_mem_limit(fqdir) > high_thresh) return NULL; rcu_read_lock(); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 72fdf1fcbcaa..cbbeb0eea0c3 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -20,6 +20,9 @@ #include <net/addrconf.h> #include <net/inet_connection_sock.h> #include <net/inet_hashtables.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <net/inet6_hashtables.h> +#endif #include <net/secure_seq.h> #include <net/ip.h> #include <net/tcp.h> @@ -470,10 +473,52 @@ static u32 inet_sk_port_offset(const struct sock *sk) inet->inet_dport); } -/* insert a socket into ehash, and eventually remove another one - * (The another one can be a SYN_RECV or TIMEWAIT +/* Searches for an exsiting socket in the ehash bucket list. + * Returns true if found, false otherwise. */ -bool inet_ehash_insert(struct sock *sk, struct sock *osk) +static bool inet_ehash_lookup_by_sk(struct sock *sk, + struct hlist_nulls_head *list) +{ + const __portpair ports = INET_COMBINED_PORTS(sk->sk_dport, sk->sk_num); + const int sdif = sk->sk_bound_dev_if; + const int dif = sk->sk_bound_dev_if; + const struct hlist_nulls_node *node; + struct net *net = sock_net(sk); + struct sock *esk; + + INET_ADDR_COOKIE(acookie, sk->sk_daddr, sk->sk_rcv_saddr); + + sk_nulls_for_each_rcu(esk, node, list) { + if (esk->sk_hash != sk->sk_hash) + continue; + if (sk->sk_family == AF_INET) { + if (unlikely(INET_MATCH(esk, net, acookie, + sk->sk_daddr, + sk->sk_rcv_saddr, + ports, dif, sdif))) { + return true; + } + } +#if IS_ENABLED(CONFIG_IPV6) + else if (sk->sk_family == AF_INET6) { + if (unlikely(INET6_MATCH(esk, net, + &sk->sk_v6_daddr, + &sk->sk_v6_rcv_saddr, + ports, dif, sdif))) { + return true; + } + } +#endif + } + return false; +} + +/* Insert a socket into ehash, and eventually remove another one + * (The another one can be a SYN_RECV or TIMEWAIT) + * If an existing socket already exists, socket sk is not inserted, + * and sets found_dup_sk parameter to true. + */ +bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; struct hlist_nulls_head *list; @@ -492,16 +537,23 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk) if (osk) { WARN_ON_ONCE(sk->sk_hash != osk->sk_hash); ret = sk_nulls_del_node_init_rcu(osk); + } else if (found_dup_sk) { + *found_dup_sk = inet_ehash_lookup_by_sk(sk, list); + if (*found_dup_sk) + ret = false; } + if (ret) __sk_nulls_add_node_rcu(sk, list); + spin_unlock(lock); + return ret; } -bool inet_ehash_nolisten(struct sock *sk, struct sock *osk) +bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk) { - bool ok = inet_ehash_insert(sk, osk); + bool ok = inet_ehash_insert(sk, osk, found_dup_sk); if (ok) { sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -545,7 +597,7 @@ int __inet_hash(struct sock *sk, struct sock *osk) int err = 0; if (sk->sk_state != TCP_LISTEN) { - inet_ehash_nolisten(sk, osk); + inet_ehash_nolisten(sk, osk, NULL); return 0; } WARN_ON(!sk_unhashed(sk)); @@ -641,7 +693,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, tb = inet_csk(sk)->icsk_bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { - inet_ehash_nolisten(sk, NULL); + inet_ehash_nolisten(sk, NULL, NULL); spin_unlock_bh(&head->lock); return 0; } @@ -720,7 +772,7 @@ ok: inet_bind_hash(sk, tb, port); if (sk_unhashed(sk)) { inet_sk(sk)->inet_sport = htons(port); - inet_ehash_nolisten(sk, (struct sock *)tw); + inet_ehash_nolisten(sk, (struct sock *)tw, NULL); } if (tw) inet_twsk_bind_unhash(tw, hinfo); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index cfeb8890f94e..fad803d2d711 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -144,7 +144,8 @@ static void ip_expire(struct timer_list *t) rcu_read_lock(); - if (qp->q.fqdir->dead) + /* Paired with WRITE_ONCE() in fqdir_pre_exit(). */ + if (READ_ONCE(qp->q.fqdir->dead)) goto out_rcu_unlock; spin_lock(&qp->q.lock); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index c4989e5903e4..5b38d03f6d79 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -432,14 +432,12 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, __be16 proto) { struct ip_tunnel *tunnel = netdev_priv(dev); - - if (tunnel->parms.o_flags & TUNNEL_SEQ) - tunnel->o_seqno++; + __be16 flags = tunnel->parms.o_flags; /* Push GRE header. */ gre_build_header(skb, tunnel->tun_hlen, - tunnel->parms.o_flags, proto, tunnel->parms.o_key, - htonl(tunnel->o_seqno)); + flags, proto, tunnel->parms.o_key, + (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0); ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); } @@ -577,8 +575,9 @@ static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) key = &info->key; ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src, - tunnel_id_to_key32(key->tun_id), key->tos, 0, - skb->mark, skb_get_hash(skb)); + tunnel_id_to_key32(key->tun_id), + key->tos & ~INET_ECN_MASK, 0, skb->mark, + skb_get_hash(skb)); rt = ip_route_output_key(dev_net(dev), &fl4); if (IS_ERR(rt)) return PTR_ERR(rt); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 0ec529d77a56..418e93987800 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -161,12 +161,19 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk, iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); iph->saddr = saddr; iph->protocol = sk->sk_protocol; - if (ip_dont_fragment(sk, &rt->dst)) { + /* Do not bother generating IPID for small packets (eg SYNACK) */ + if (skb->len <= IPV4_MIN_MTU || ip_dont_fragment(sk, &rt->dst)) { iph->frag_off = htons(IP_DF); iph->id = 0; } else { iph->frag_off = 0; - __ip_select_ident(net, iph, 1); + /* TCP packets here are SYNACK with fat IPv4/TCP options. + * Avoid using the hashed IP ident generator. + */ + if (sk->sk_protocol == IPPROTO_TCP) + iph->id = (__force __be16)prandom_u32(); + else + __ip_select_ident(net, iph, 1); } if (opt && opt->opt.optlen) { diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index d71935618871..2da689608036 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -259,7 +259,9 @@ static int __net_init ipmr_rules_init(struct net *net) return 0; err2: + rtnl_lock(); ipmr_free_table(mrt); + rtnl_unlock(); err1: fib_rules_unregister(ops); return err; diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 6bdb1ab8af61..63ebb87d8533 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -505,8 +505,11 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) if (IS_ERR(config)) return PTR_ERR(config); } - } else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN)) + } else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN)) { + clusterip_config_entry_put(config); + clusterip_config_put(config); return -EINVAL; + } ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) { diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 858bb10d8341..4d69b3de980a 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -839,15 +839,36 @@ static void remove_nexthop(struct net *net, struct nexthop *nh, /* if any FIB entries reference this nexthop, any dst entries * need to be regenerated */ -static void nh_rt_cache_flush(struct net *net, struct nexthop *nh) +static void nh_rt_cache_flush(struct net *net, struct nexthop *nh, + struct nexthop *replaced_nh) { struct fib6_info *f6i; + struct nh_group *nhg; + int i; if (!list_empty(&nh->fi_list)) rt_cache_flush(net); list_for_each_entry(f6i, &nh->f6i_list, nh_list) ipv6_stub->fib6_update_sernum(net, f6i); + + /* if an IPv6 group was replaced, we have to release all old + * dsts to make sure all refcounts are released + */ + if (!replaced_nh->is_group) + return; + + /* new dsts must use only the new nexthop group */ + synchronize_net(); + + nhg = rtnl_dereference(replaced_nh->nh_grp); + for (i = 0; i < nhg->num_nh; i++) { + struct nh_grp_entry *nhge = &nhg->nh_entries[i]; + struct nh_info *nhi = rtnl_dereference(nhge->nh->nh_info); + + if (nhi->family == AF_INET6) + ipv6_stub->fib6_nh_release_dsts(&nhi->fib6_nh); + } } static int replace_nexthop_grp(struct net *net, struct nexthop *old, @@ -994,7 +1015,7 @@ static int replace_nexthop(struct net *net, struct nexthop *old, err = replace_nexthop_single(net, old, new, extack); if (!err) { - nh_rt_cache_flush(net, old); + nh_rt_cache_flush(net, old, new); __remove_nexthop(net, new, NULL); nexthop_put(new); @@ -1231,11 +1252,15 @@ static int nh_create_ipv6(struct net *net, struct nexthop *nh, /* sets nh_dev if successful */ err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL, extack); - if (err) + if (err) { + /* IPv6 is not enabled, don't call fib6_nh_release */ + if (err == -EAFNOSUPPORT) + goto out; ipv6_stub->fib6_nh_release(fib6_nh); - else + } else { nh->nh_flags = fib6_nh->fib_nh_flags; - + } +out: return err; } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 1c3d5d3702a1..33e6392e8b82 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -172,16 +172,22 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) struct sock *sk = NULL; struct inet_sock *isk; struct hlist_nulls_node *hnode; - int dif = skb->dev->ifindex; + int dif, sdif; if (skb->protocol == htons(ETH_P_IP)) { + dif = inet_iif(skb); + sdif = inet_sdif(skb); pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n", (int)ident, &ip_hdr(skb)->daddr, dif); #if IS_ENABLED(CONFIG_IPV6) } else if (skb->protocol == htons(ETH_P_IPV6)) { + dif = inet6_iif(skb); + sdif = inet6_sdif(skb); pr_debug("try to find: num = %d, daddr = %pI6c, dif = %d\n", (int)ident, &ipv6_hdr(skb)->daddr, dif); #endif + } else { + return NULL; } read_lock_bh(&ping_table.lock); @@ -220,7 +226,8 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) continue; } - if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) + if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif && + sk->sk_bound_dev_if != sdif) continue; sock_hold(sk); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 3183413ebc6c..ddc24e57dc55 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -720,6 +720,7 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) int ret = -EINVAL; int chk_addr_ret; + lock_sock(sk); if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in)) goto out; @@ -739,7 +740,9 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_saddr = 0; /* Use device */ sk_dst_reset(sk); ret = 0; -out: return ret; +out: + release_sock(sk); + return ret; } /* diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 2b45d1455592..6811174ad518 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -332,6 +332,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); treq = tcp_rsk(req); + treq->af_specific = &tcp_request_sock_ipv4_ops; treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = cookie; treq->ts_off = 0; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 9f53d25e047e..4815cf72569e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1652,11 +1652,13 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, if (!copied) copied = used; break; - } else if (used <= len) { - seq += used; - copied += used; - offset += used; } + if (WARN_ON_ONCE(used > len)) + used = len; + seq += used; + copied += used; + offset += used; + /* If recv_actor drops the lock (e.g. TCP splice * receive) the skb pointer might be invalid when * getting here: tcp_collapse might have deleted it diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 7df7ec74807a..bcc13368c836 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -296,10 +296,9 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, struct sk_psock *psock = sk_psock_get(sk); int ret; - if (unlikely(!psock)) { - sk_msg_free(sk, msg); - return 0; - } + if (unlikely(!psock)) + return -EPIPE; + ret = ingress ? bpf_tcp_ingress(sk, psock, msg, bytes, flags) : tcp_bpf_push_locked(sk, msg, bytes, flags, false); sk_psock_put(sk, psock); @@ -367,7 +366,7 @@ more_data: cork = true; psock->cork = NULL; } - sk_msg_return(sk, msg, tosend); + sk_msg_return(sk, msg, msg->sg.size); release_sock(sk); ret = tcp_bpf_sendmsg_redir(sk_redir, msg, tosend, flags); @@ -407,8 +406,11 @@ more_data: } if (msg && msg->sg.data[msg->sg.start].page_link && - msg->sg.data[msg->sg.start].length) + msg->sg.data[msg->sg.start].length) { + if (eval == __SK_REDIRECT) + sk_mem_charge(sk, msg->sg.size); goto more_data; + } } return ret; } diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index ee6c38a73325..44be7a5a1391 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -341,8 +341,6 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) return; if (tcp_in_slow_start(tp)) { - if (hystart && after(ack, ca->end_seq)) - bictcp_hystart_reset(sk); acked = tcp_slow_start(tp, acked); if (!acked) return; @@ -384,6 +382,9 @@ static void hystart_update(struct sock *sk, u32 delay) if (ca->found & hystart_detect) return; + if (after(tp->snd_una, ca->end_seq)) + bictcp_hystart_reset(sk); + if (hystart_detect & HYSTART_ACK_TRAIN) { u32 now = bictcp_clock(); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c0fcfa296468..b0e6fc2c5e10 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3717,7 +3717,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_process_tlp_ack(sk, ack, flag); if (tcp_ack_is_dubious(sk, flag)) { - if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP))) { + if (!(flag & (FLAG_SND_UNA_ADVANCED | + FLAG_NOT_DUP | FLAG_DSACKING_ACK))) { num_dupack = 1; /* Consider if pure acks were aggregated in tcp_add_backlog() */ if (!(flag & FLAG_DATA)) @@ -5230,7 +5231,17 @@ static void tcp_new_space(struct sock *sk) sk->sk_write_space(sk); } -static void tcp_check_space(struct sock *sk) +/* Caller made space either from: + * 1) Freeing skbs in rtx queues (after tp->snd_una has advanced) + * 2) Sent skbs from output queue (and thus advancing tp->snd_nxt) + * + * We might be able to generate EPOLLOUT to the application if: + * 1) Space consumed in output/rtx queues is below sk->sk_sndbuf/2 + * 2) notsent amount (tp->write_seq - tp->snd_nxt) became + * small enough that tcp_stream_memory_free() decides it + * is time to generate EPOLLOUT. + */ +void tcp_check_space(struct sock *sk) { if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) { sock_reset_flag(sk, SOCK_QUEUE_SHRUNK); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2ce85e52aea7..72fe93ace7d7 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1383,7 +1383,7 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = { .syn_ack_timeout = tcp_syn_ack_timeout, }; -static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { +const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { .mss_clamp = TCP_MSS_DEFAULT, #ifdef CONFIG_TCP_MD5SIG .req_md5_lookup = tcp_v4_md5_lookup, @@ -1426,6 +1426,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, bool *own_req) { struct inet_request_sock *ireq; + bool found_dup_sk = false; struct inet_sock *newinet; struct tcp_sock *newtp; struct sock *newsk; @@ -1496,12 +1497,22 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, if (__inet_inherit_port(sk, newsk) < 0) goto put_and_exit; - *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), + &found_dup_sk); if (likely(*own_req)) { tcp_move_syn(newtp, req); ireq->ireq_opt = NULL; } else { newinet->inet_opt = NULL; + + if (!req_unhash && found_dup_sk) { + /* This code path should only be executed in the + * syncookie case only + */ + bh_unlock_sock(newsk); + sock_put(newsk); + newsk = NULL; + } } return newsk; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 194743bd3fc1..9b038cb0a43d 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -538,7 +538,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->tsoffset = treq->ts_off; #ifdef CONFIG_TCP_MD5SIG newtp->md5sig_info = NULL; /*XXX*/ - if (newtp->af_specific->md5_lookup(sk, newsk)) + if (treq->af_specific->req_md5_lookup(sk, req_to_sk(req))) newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED; #endif if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 638d7b49ad71..67493ec6318a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -81,6 +81,7 @@ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT, tcp_skb_pcount(skb)); + tcp_check_space(sk); } /* SND.NXT, if window was not shrunk or the amount of shrunk was less than one @@ -3492,6 +3493,7 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb) */ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct tcp_fastopen_request *fo = tp->fastopen_req; int space, err = 0; @@ -3506,8 +3508,10 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) * private TCP options. The cost is reduced data space in SYN :( */ tp->rx_opt.mss_clamp = tcp_mss_clamp(tp, tp->rx_opt.mss_clamp); + /* Sync mss_cache after updating the mss_clamp */ + tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); - space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) - + space = __tcp_mtu_to_mss(sk, icsk->icsk_pmtu_cookie) - MAX_TCP_OPTION_SPACE; space = min_t(size_t, space, fo->size); diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c index 0de693565963..6ab197928abb 100644 --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c @@ -73,26 +73,31 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb) * * If an ACK (s)acks multiple skbs (e.g., stretched-acks), this function is * called multiple times. We favor the information from the most recently - * sent skb, i.e., the skb with the highest prior_delivered count. + * sent skb, i.e., the skb with the most recently sent time and the highest + * sequence. */ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, struct rate_sample *rs) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_skb_cb *scb = TCP_SKB_CB(skb); + u64 tx_tstamp; if (!scb->tx.delivered_mstamp) return; + tx_tstamp = tcp_skb_timestamp_us(skb); if (!rs->prior_delivered || - after(scb->tx.delivered, rs->prior_delivered)) { + tcp_skb_sent_after(tx_tstamp, tp->first_tx_mstamp, + scb->end_seq, rs->last_end_seq)) { rs->prior_delivered = scb->tx.delivered; rs->prior_mstamp = scb->tx.delivered_mstamp; rs->is_app_limited = scb->tx.is_app_limited; rs->is_retrans = scb->sacked & TCPCB_RETRANS; + rs->last_end_seq = scb->end_seq; /* Record send time of most recently ACKed packet: */ - tp->first_tx_mstamp = tcp_skb_timestamp_us(skb); + tp->first_tx_mstamp = tx_tstamp; /* Find the duration of the "send phase" of this window: */ rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp, scb->tx.first_tx_mstamp); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index fdbd56ee1300..83fd4fa40d5e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -544,6 +544,12 @@ void udp_encap_enable(void) } EXPORT_SYMBOL(udp_encap_enable); +void udp_encap_disable(void) +{ + static_branch_dec(&udp_encap_needed_key); +} +EXPORT_SYMBOL(udp_encap_disable); + /* Handler for tunnels with arbitrary destination ports: no socket lookup, go * through error handlers in encapsulations looking for a match. */ @@ -845,7 +851,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, kfree_skb(skb); return -EINVAL; } - if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) { + if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) { kfree_skb(skb); return -EINVAL; } @@ -2943,7 +2949,7 @@ int udp4_seq_show(struct seq_file *seq, void *v) { seq_setwidth(seq, 127); if (v == SEQ_START_TOKEN) - seq_puts(seq, " sl local_address rem_address st tx_queue " + seq_puts(seq, " sl local_address rem_address st tx_queue " "rx_queue tr tm->when retrnsmt uid timeout " "inode ref pointer drops"); else { diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d1f29a3eb70b..92b32d131e1c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -542,7 +542,7 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, #ifdef CONFIG_IPV6_MROUTE if ((all || type == NETCONFA_MC_FORWARDING) && nla_put_s32(skb, NETCONFA_MC_FORWARDING, - devconf->mc_forwarding) < 0) + atomic_read(&devconf->mc_forwarding)) < 0) goto nla_put_failure; #endif if ((all || type == NETCONFA_PROXY_NEIGH) && @@ -3715,6 +3715,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) struct inet6_dev *idev; struct inet6_ifaddr *ifa, *tmp; bool keep_addr = false; + bool was_ready; int state, i; ASSERT_RTNL(); @@ -3780,7 +3781,10 @@ restart: addrconf_del_rs_timer(idev); - /* Step 2: clear flags for stateless addrconf */ + /* Step 2: clear flags for stateless addrconf, repeated down + * detection + */ + was_ready = idev->if_flags & IF_READY; if (!how) idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY); @@ -3854,7 +3858,7 @@ restart: if (how) { ipv6_ac_destroy_dev(idev); ipv6_mc_destroy_dev(idev); - } else { + } else if (was_ready) { ipv6_mc_down(idev); } @@ -4924,6 +4928,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid)) goto error; + spin_lock_bh(&ifa->lock); if (!((ifa->flags&IFA_F_PERMANENT) && (ifa->prefered_lft == INFINITY_LIFE_TIME))) { preferred = ifa->prefered_lft; @@ -4945,6 +4950,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, preferred = INFINITY_LIFE_TIME; valid = INFINITY_LIFE_TIME; } + spin_unlock_bh(&ifa->lock); if (!ipv6_addr_any(&ifa->peer_addr)) { if (nla_put_in6_addr(skb, IFA_LOCAL, &ifa->addr) < 0 || @@ -5458,7 +5464,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_USE_OPTIMISTIC] = cnf->use_optimistic; #endif #ifdef CONFIG_IPV6_MROUTE - array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding; + array[DEVCONF_MC_FORWARDING] = atomic_read(&cnf->mc_forwarding); #endif array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6; array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 14ac1d911287..942da168f18f 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -955,6 +955,7 @@ static const struct ipv6_stub ipv6_stub_impl = { .ip6_mtu_from_fib6 = ip6_mtu_from_fib6, .fib6_nh_init = fib6_nh_init, .fib6_nh_release = fib6_nh_release, + .fib6_nh_release_dsts = fib6_nh_release_dsts, .fib6_update_sernum = fib6_update_sernum_stub, .fib6_rt_update = fib6_rt_update, .ip6_del_rt = ip6_del_rt, diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 12570a73def8..79f117e33b80 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -230,6 +230,11 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info struct page *page; struct sk_buff *trailer; int tailen = esp->tailen; + unsigned int allocsz; + + allocsz = ALIGN(skb->data_len + tailen, L1_CACHE_BYTES); + if (allocsz > ESP_SKB_FRAG_MAXSIZE) + goto cow; if (!skb_cloned(skb)) { if (tailen <= skb_tailroom(skb)) { @@ -440,7 +445,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb); u32 padto; - padto = min(x->tfcpad, __xfrm_state_mtu(x, dst->child_mtu_cached)); + padto = min(x->tfcpad, xfrm_state_mtu(x, dst->child_mtu_cached)); if (skb->len < padto) esp.tfclen = padto - skb->len; } diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index f9e8fe3ff0c5..172726939652 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -260,7 +260,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, return __fib6_rule_action(rule, flp, flags, arg); } -static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg) +static bool fib6_rule_suppress(struct fib_rule *rule, int flags, struct fib_lookup_arg *arg) { struct fib6_result *res = arg->result; struct rt6_info *rt = res->rt6; @@ -287,8 +287,7 @@ static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg return false; suppress_route: - if (!(arg->flags & FIB_LOOKUP_NOREF)) - ip6_rt_put(rt); + ip6_rt_put_flags(rt, flags); return true; } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 9a6f66e0e9a2..ef55489651f8 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -110,7 +110,7 @@ void fib6_update_sernum(struct net *net, struct fib6_info *f6i) fn = rcu_dereference_protected(f6i->fib6_node, lockdep_is_held(&f6i->fib6_table->tb6_lock)); if (fn) - fn->fn_sernum = fib6_new_sernum(net); + WRITE_ONCE(fn->fn_sernum, fib6_new_sernum(net)); } /* @@ -535,12 +535,13 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, spin_unlock_bh(&table->tb6_lock); if (res > 0) { cb->args[4] = 1; - cb->args[5] = w->root->fn_sernum; + cb->args[5] = READ_ONCE(w->root->fn_sernum); } } else { - if (cb->args[5] != w->root->fn_sernum) { + int sernum = READ_ONCE(w->root->fn_sernum); + if (cb->args[5] != sernum) { /* Begin at the root if the tree changed */ - cb->args[5] = w->root->fn_sernum; + cb->args[5] = sernum; w->state = FWS_INIT; w->node = w->root; w->skip = w->count; @@ -1276,7 +1277,7 @@ static void __fib6_update_sernum_upto_root(struct fib6_info *rt, /* paired with smp_rmb() in rt6_get_cookie_safe() */ smp_wmb(); while (fn) { - fn->fn_sernum = sernum; + WRITE_ONCE(fn->fn_sernum, sernum); fn = rcu_dereference_protected(fn->parent, lockdep_is_held(&rt->fib6_table->tb6_lock)); } @@ -2068,8 +2069,8 @@ static int fib6_clean_node(struct fib6_walker *w) }; if (c->sernum != FIB6_NO_SERNUM_CHANGE && - w->node->fn_sernum != c->sernum) - w->node->fn_sernum = c->sernum; + READ_ONCE(w->node->fn_sernum) != c->sernum) + WRITE_ONCE(w->node->fn_sernum, c->sernum); if (!c->func) { WARN_ON_ONCE(c->sernum == FIB6_NO_SERNUM_CHANGE); @@ -2433,7 +2434,7 @@ static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter, iter->w.state = FWS_INIT; iter->w.node = iter->w.root; iter->w.args = iter; - iter->sernum = iter->w.root->fn_sernum; + iter->sernum = READ_ONCE(iter->w.root->fn_sernum); INIT_LIST_HEAD(&iter->w.lh); fib6_walker_link(net, &iter->w); } @@ -2461,8 +2462,10 @@ static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl, static void ipv6_route_check_sernum(struct ipv6_route_iter *iter) { - if (iter->sernum != iter->w.root->fn_sernum) { - iter->sernum = iter->w.root->fn_sernum; + int sernum = READ_ONCE(iter->w.root->fn_sernum); + + if (iter->sernum != sernum) { + iter->sernum = sernum; iter->w.state = FWS_INIT; iter->w.node = iter->w.root; WARN_ON(iter->w.skip); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 0cb8056d9800..e550db28aabb 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -730,6 +730,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; __be16 flags; + int tun_hlen; tun_info = skb_tunnel_info(skb); if (unlikely(!tun_info || @@ -743,13 +744,14 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, fl6->daddr = key->u.ipv6.dst; fl6->flowlabel = key->label; fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL); + fl6->fl6_gre_key = tunnel_id_to_key32(key->tun_id); dsfield = key->tos; flags = key->tun_flags & (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ); - tunnel->tun_hlen = gre_calc_hlen(flags); + tun_hlen = gre_calc_hlen(flags); - gre_build_header(skb, tunnel->tun_hlen, + gre_build_header(skb, tun_hlen, flags, protocol, tunnel_id_to_key32(tun_info->key.tun_id), (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) @@ -978,6 +980,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, fl6.daddr = key->u.ipv6.dst; fl6.flowlabel = key->label; fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + fl6.fl6_gre_key = tunnel_id_to_key32(key->tun_id); dsfield = key->tos; if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT)) @@ -1085,6 +1088,7 @@ static void ip6gre_tnl_link_config_common(struct ip6_tnl *t) fl6->flowi6_oif = p->link; fl6->flowlabel = 0; fl6->flowi6_proto = IPPROTO_GRE; + fl6->fl6_gre_key = t->parms.o_key; if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS)) fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo; @@ -1530,7 +1534,7 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev) static struct inet6_protocol ip6gre_protocol __read_mostly = { .handler = gre_rcv, .err_handler = ip6gre_err, - .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, + .flags = INET6_PROTO_FINAL, }; static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head) diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 7e5df23cbe7b..e6c4966aa956 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -485,7 +485,7 @@ int ip6_mc_input(struct sk_buff *skb) /* * IPv6 multicast router mode is now supported ;) */ - if (dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding && + if (atomic_read(&dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding) && !(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL)) && likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) { diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 7fbb44736a34..b7b4ba68f3a2 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -111,6 +111,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, if (likely(ops && ops->callbacks.gso_segment)) { skb_reset_transport_header(skb); segs = ops->callbacks.gso_segment(skb, features); + if (!segs) + skb->network_header = skb_mac_header(skb) + nhoff - skb->head; } if (IS_ERR_OR_NULL(segs)) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index fc913f09606d..5585e3a94f3c 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -192,7 +192,7 @@ static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) /* Policy lookup after SNAT yielded a new policy */ if (skb_dst(skb)->xfrm) { - IPCB(skb)->flags |= IPSKB_REROUTED; + IP6CB(skb)->flags |= IP6SKB_REROUTED; return dst_output(net, sk, skb); } #endif @@ -506,7 +506,7 @@ int ip6_forward(struct sk_buff *skb) goto drop; if (!net->ipv6.devconf_all->disable_policy && - !idev->cnf.disable_policy && + (!idev || !idev->cnf.disable_policy) && !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); goto drop; @@ -1361,8 +1361,6 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, if (np->frag_size) mtu = np->frag_size; } - if (mtu < IPV6_MIN_MTU) - return -EINVAL; cork->base.fragsize = mtu; cork->base.gso_size = ipc6->gso_size; cork->base.tx_flags = 0; @@ -1424,8 +1422,6 @@ static int __ip6_append_data(struct sock *sk, fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + (opt ? opt->opt_nflen : 0); - maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - - sizeof(struct frag_hdr); headersize = sizeof(struct ipv6hdr) + (opt ? opt->opt_flen + opt->opt_nflen : 0) + @@ -1433,6 +1429,13 @@ static int __ip6_append_data(struct sock *sk, sizeof(struct frag_hdr) : 0) + rt->rt6i_nfheader_len; + if (mtu <= fragheaderlen || + ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr)) + goto emsgsize; + + maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - + sizeof(struct frag_hdr); + /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit * the first fragment */ diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index fd0d1cee2d3f..878a08c40fff 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1000,14 +1000,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t, if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false, 0, IFA_F_TENTATIVE))) - pr_warn("%s xmit: Local address not yet configured!\n", - p->name); + pr_warn_ratelimited("%s xmit: Local address not yet configured!\n", + p->name); else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) && !ipv6_addr_is_multicast(raddr) && unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev, true, 0, IFA_F_TENTATIVE))) - pr_warn("%s xmit: Routing loop! Remote address found on this node!\n", - p->name); + pr_warn_ratelimited("%s xmit: Routing loop! Remote address found on this node!\n", + p->name); else ret = 1; rcu_read_unlock(); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 12ab6605d961..8b44d3b53844 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -795,6 +795,8 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) struct net *net = dev_net(dev); struct vti6_net *ip6n = net_generic(net, vti6_net_id); + memset(&p1, 0, sizeof(p1)); + switch (cmd) { case SIOCGETTUNNEL: if (dev == ip6n->fb_tnl_dev) { diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index dd41313d7fa5..6248e00c2bf7 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -246,7 +246,9 @@ static int __net_init ip6mr_rules_init(struct net *net) return 0; err2: + rtnl_lock(); ip6mr_free_table(mrt); + rtnl_unlock(); err1: fib_rules_unregister(ops); return err; @@ -734,7 +736,7 @@ static int mif6_delete(struct mr_table *mrt, int vifi, int notify, in6_dev = __in6_dev_get(dev); if (in6_dev) { - in6_dev->cnf.mc_forwarding--; + atomic_dec(&in6_dev->cnf.mc_forwarding); inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, dev->ifindex, &in6_dev->cnf); @@ -902,7 +904,7 @@ static int mif6_add(struct net *net, struct mr_table *mrt, in6_dev = __in6_dev_get(dev); if (in6_dev) { - in6_dev->cnf.mc_forwarding++; + atomic_inc(&in6_dev->cnf.mc_forwarding); inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, dev->ifindex, &in6_dev->cnf); @@ -1551,7 +1553,7 @@ static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk) } else { rcu_assign_pointer(mrt->mroute_sk, sk); sock_set_flag(sk, SOCK_RCU_FREE); - net->ipv6.devconf_all->mc_forwarding++; + atomic_inc(&net->ipv6.devconf_all->mc_forwarding); } write_unlock_bh(&mrt_lock); @@ -1584,7 +1586,7 @@ int ip6mr_sk_done(struct sock *sk) * so the RCU grace period before sk freeing * is guaranteed by sk_destruct() */ - net->ipv6.devconf_all->mc_forwarding--; + atomic_dec(&net->ipv6.devconf_all->mc_forwarding); write_unlock_bh(&mrt_lock); inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index daa876c6ae8d..619d9dffa9e4 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2697,7 +2697,7 @@ static void ip6_link_failure(struct sk_buff *skb) if (from) { fn = rcu_dereference(from->fib6_node); if (fn && (rt->rt6i_flags & RTF_DEFAULT)) - fn->fn_sernum = -1; + WRITE_ONCE(fn->fn_sernum, -1); } } rcu_read_unlock(); @@ -3585,6 +3585,25 @@ void fib6_nh_release(struct fib6_nh *fib6_nh) fib_nh_common_release(&fib6_nh->nh_common); } +void fib6_nh_release_dsts(struct fib6_nh *fib6_nh) +{ + int cpu; + + if (!fib6_nh->rt6i_pcpu) + return; + + for_each_possible_cpu(cpu) { + struct rt6_info *pcpu_rt, **ppcpu_rt; + + ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu); + pcpu_rt = xchg(ppcpu_rt, NULL); + if (pcpu_rt) { + dst_dev_put(&pcpu_rt->dst); + dst_release(&pcpu_rt->dst); + } + } +} + static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack) @@ -4384,7 +4403,7 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) struct inet6_dev *idev; int type; - if (netif_is_l3_master(skb->dev) && + if (netif_is_l3_master(skb->dev) || dst->dev == net->loopback_dev) idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif)); else @@ -5073,6 +5092,19 @@ static void ip6_route_mpath_notify(struct fib6_info *rt, inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); } +static int fib6_gw_from_attr(struct in6_addr *gw, struct nlattr *nla, + struct netlink_ext_ack *extack) +{ + if (nla_len(nla) < sizeof(*gw)) { + NL_SET_ERR_MSG(extack, "Invalid IPv6 address in RTA_GATEWAY"); + return -EINVAL; + } + + *gw = nla_get_in6_addr(nla); + + return 0; +} + static int ip6_route_multipath_add(struct fib6_config *cfg, struct netlink_ext_ack *extack) { @@ -5114,10 +5146,18 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, nla = nla_find(attrs, attrlen, RTA_GATEWAY); if (nla) { - r_cfg.fc_gateway = nla_get_in6_addr(nla); + err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla, + extack); + if (err) + goto cleanup; + r_cfg.fc_flags |= RTF_GATEWAY; } r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP); + + /* RTA_ENCAP_TYPE length checked in + * lwtunnel_valid_encap_type_attr + */ nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); if (nla) r_cfg.fc_encap_type = nla_get_u16(nla); @@ -5269,7 +5309,13 @@ static int ip6_route_multipath_del(struct fib6_config *cfg, nla = nla_find(attrs, attrlen, RTA_GATEWAY); if (nla) { - nla_memcpy(&r_cfg.fc_gateway, nla, 16); + err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla, + extack); + if (err) { + last_err = err; + goto next_rtnh; + } + r_cfg.fc_flags |= RTF_GATEWAY; } } @@ -5277,6 +5323,7 @@ static int ip6_route_multipath_del(struct fib6_config *cfg, if (err) last_err = err; +next_rtnh: rtnh = rtnh_next(rtnh, &remaining); } diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index ab7f124ff5d7..6954db1fd26e 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -143,6 +143,14 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb)); memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); + + /* the control block has been erased, so we have to set the + * iif once again. + * We read the receiving interface index directly from the + * skb->skb_iif as it is done in the IPv4 receiving path (i.e.: + * ip_rcv_core(...)). + */ + IP6CB(skb)->iif = skb->skb_iif; } hdr->nexthdr = NEXTHDR_ROUTING; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 7f9cae4c49e7..16e75a996b74 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1876,7 +1876,6 @@ static int __net_init sit_init_net(struct net *net) return 0; err_reg_dev: - ipip6_dev_free(sitn->fb_tunnel_dev); free_netdev(sitn->fb_tunnel_dev); err_alloc_dev: return err; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index ec155844012b..37ab254f7b92 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -176,6 +176,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); treq = tcp_rsk(req); + treq->af_specific = &tcp_request_sock_ipv6_ops; treq->tfo_listener = false; if (security_inet_conn_request(sk, skb, req)) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3903cc0ab188..063898cae3e5 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -800,7 +800,7 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = { .syn_ack_timeout = tcp_syn_ack_timeout, }; -static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { +const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr), #ifdef CONFIG_TCP_MD5SIG @@ -1142,6 +1142,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * const struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct ipv6_txoptions *opt; struct inet_sock *newinet; + bool found_dup_sk = false; struct tcp_sock *newtp; struct sock *newsk; #ifdef CONFIG_TCP_MD5SIG @@ -1308,7 +1309,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * tcp_done(newsk); goto out; } - *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), + &found_dup_sk); if (*own_req) { tcp_move_syn(newtp, req); @@ -1323,6 +1325,15 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * skb_set_owner_r(newnp->pktoptions, newsk); } } + } else { + if (!req_unhash && found_dup_sk) { + /* This code path should only be executed in the + * syncookie case only + */ + bh_unlock_sock(newsk); + sock_put(newsk); + newsk = NULL; + } } return newsk; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 818fc9975625..040869f45682 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1132,7 +1132,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, kfree_skb(skb); return -EINVAL; } - if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) { + if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) { kfree_skb(skb); return -EINVAL; } @@ -1553,8 +1553,10 @@ void udpv6_destroy_sock(struct sock *sk) if (encap_destroy) encap_destroy(sk); } - if (up->encap_enabled) + if (up->encap_enabled) { static_branch_dec(&udpv6_encap_needed_key); + udp_encap_disable(); + } } inet6_destroy_sock(sk); diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 6b0ed6c593e2..a6f13fab963f 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -140,6 +140,19 @@ static int __xfrm6_output_finish(struct net *net, struct sock *sk, struct sk_buf return __xfrm6_output_state_finish(x, sk, skb); } +static int xfrm6_noneed_fragment(struct sk_buff *skb) +{ + struct frag_hdr *fh; + u8 prevhdr = ipv6_hdr(skb)->nexthdr; + + if (prevhdr != NEXTHDR_FRAGMENT) + return 0; + fh = (struct frag_hdr *)(skb->data + sizeof(struct ipv6hdr)); + if (fh->nexthdr == NEXTHDR_ESP || fh->nexthdr == NEXTHDR_AUTH) + return 1; + return 0; +} + static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -168,6 +181,9 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) xfrm6_local_rxpmtu(skb, mtu); kfree_skb(skb); return -EMSGSIZE; + } else if (toobig && xfrm6_noneed_fragment(skb)) { + skb->ignore_df = 1; + goto skip_frag; } else if (!skb->ignore_df && toobig && skb->sk) { xfrm_local_error(skb, mtu); kfree_skb(skb); diff --git a/net/key/af_key.c b/net/key/af_key.c index 907d04a47459..2ac9560020f9 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1703,7 +1703,7 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sad xfrm_probe_algs(); - supp_skb = compose_sadb_supported(hdr, GFP_KERNEL); + supp_skb = compose_sadb_supported(hdr, GFP_KERNEL | __GFP_ZERO); if (!supp_skb) { if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC) pfk->registered &= ~(1<<hdr->sadb_msg_satype); @@ -2627,7 +2627,7 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, } return xfrm_migrate(&sel, dir, XFRM_POLICY_TYPE_MAIN, m, i, - kma ? &k : NULL, net, NULL); + kma ? &k : NULL, net, NULL, 0); out: return err; diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index f35899d45a9a..ff4352f6d168 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -54,7 +54,7 @@ int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex) dev = dev_get_by_index_rcu(net, ifindex); while (dev && !netif_is_l3_master(dev)) - dev = netdev_master_upper_dev_get(dev); + dev = netdev_master_upper_dev_get_rcu(dev); return dev ? dev->ifindex : 0; } diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 0b3adf7594ff..3b1ea89a340e 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -276,6 +276,7 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr) { struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); + struct net_device *dev = NULL; struct llc_sap *sap; int rc = -EINVAL; @@ -287,14 +288,14 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr) goto out; rc = -ENODEV; if (sk->sk_bound_dev_if) { - llc->dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if); - if (llc->dev && addr->sllc_arphrd != llc->dev->type) { - dev_put(llc->dev); - llc->dev = NULL; + dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if); + if (dev && addr->sllc_arphrd != dev->type) { + dev_put(dev); + dev = NULL; } } else - llc->dev = dev_getfirstbyhwtype(&init_net, addr->sllc_arphrd); - if (!llc->dev) + dev = dev_getfirstbyhwtype(&init_net, addr->sllc_arphrd); + if (!dev) goto out; rc = -EUSERS; llc->laddr.lsap = llc_ui_autoport(); @@ -304,6 +305,11 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr) sap = llc_sap_open(llc->laddr.lsap, NULL); if (!sap) goto out; + + /* Note: We do not expect errors from this point. */ + llc->dev = dev; + dev = NULL; + memcpy(llc->laddr.mac, llc->dev->dev_addr, IFHWADDRLEN); memcpy(&llc->addr, addr, sizeof(llc->addr)); /* assign new connection to its SAP */ @@ -311,6 +317,7 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr) sock_reset_flag(sk, SOCK_ZAPPED); rc = 0; out: + dev_put(dev); return rc; } @@ -333,6 +340,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) struct sockaddr_llc *addr = (struct sockaddr_llc *)uaddr; struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); + struct net_device *dev = NULL; struct llc_sap *sap; int rc = -EINVAL; @@ -348,25 +356,26 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) rc = -ENODEV; rcu_read_lock(); if (sk->sk_bound_dev_if) { - llc->dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if); - if (llc->dev) { + dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if); + if (dev) { if (is_zero_ether_addr(addr->sllc_mac)) - memcpy(addr->sllc_mac, llc->dev->dev_addr, + memcpy(addr->sllc_mac, dev->dev_addr, IFHWADDRLEN); - if (addr->sllc_arphrd != llc->dev->type || + if (addr->sllc_arphrd != dev->type || !ether_addr_equal(addr->sllc_mac, - llc->dev->dev_addr)) { + dev->dev_addr)) { rc = -EINVAL; - llc->dev = NULL; + dev = NULL; } } - } else - llc->dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd, + } else { + dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd, addr->sllc_mac); - if (llc->dev) - dev_hold(llc->dev); + } + if (dev) + dev_hold(dev); rcu_read_unlock(); - if (!llc->dev) + if (!dev) goto out; if (!addr->sllc_sap) { rc = -EUSERS; @@ -399,6 +408,11 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) goto out_put; } } + + /* Note: We do not expect errors from this point. */ + llc->dev = dev; + dev = NULL; + llc->laddr.lsap = addr->sllc_sap; memcpy(llc->laddr.mac, addr->sllc_mac, IFHWADDRLEN); memcpy(&llc->addr, addr, sizeof(llc->addr)); @@ -409,6 +423,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) out_put: llc_sap_put(sap); out: + dev_put(dev); release_sock(sk); return rc; } diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 4d1c335e06e5..49ec9bfb6c8e 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation */ /** @@ -191,7 +191,8 @@ static void ieee80211_add_addbaext(struct ieee80211_sub_if_data *sdata, sband = ieee80211_get_sband(sdata); if (!sband) return; - he_cap = ieee80211_get_he_iftype_cap(sband, sdata->vif.type); + he_cap = ieee80211_get_he_iftype_cap(sband, + ieee80211_vif_type_p2p(&sdata->vif)); if (!he_cap) return; @@ -292,7 +293,8 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta, goto end; } - if (!sta->sta.ht_cap.ht_supported) { + if (!sta->sta.ht_cap.ht_supported && + sta->sdata->vif.bss_conf.chandef.chan->band != NL80211_BAND_6GHZ) { ht_dbg(sta->sdata, "STA %pM erroneously requests BA session on tid %d w/o QoS\n", sta->sta.addr, tid); diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index b11883d26875..f30cdd7f3a73 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2019 Intel Corporation + * Copyright (C) 2018 - 2022 Intel Corporation */ #include <linux/ieee80211.h> @@ -106,7 +106,7 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, mgmt->u.action.u.addba_req.start_seq_num = cpu_to_le16(start_seq_num << 4); - ieee80211_tx_skb(sdata, skb); + ieee80211_tx_skb_tid(sdata, skb, tid); } void ieee80211_send_bar(struct ieee80211_vif *vif, u8 *ra, u16 tid, u16 ssn) @@ -213,6 +213,8 @@ ieee80211_agg_start_txq(struct sta_info *sta, int tid, bool enable) struct ieee80211_txq *txq = sta->sta.txq[tid]; struct txq_info *txqi; + lockdep_assert_held(&sta->ampdu_mlme.mtx); + if (!txq) return; @@ -290,7 +292,6 @@ static void ieee80211_remove_tid_tx(struct sta_info *sta, int tid) ieee80211_assign_tid_tx(sta, tid, NULL); ieee80211_agg_splice_finish(sta->sdata, tid); - ieee80211_agg_start_txq(sta, tid, false); kfree_rcu(tid_tx, rcu_head); } @@ -448,6 +449,42 @@ static void sta_addba_resp_timer_expired(struct timer_list *t) ieee80211_stop_tx_ba_session(&sta->sta, tid); } +static void ieee80211_send_addba_with_timeout(struct sta_info *sta, + struct tid_ampdu_tx *tid_tx) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sta->local; + u8 tid = tid_tx->tid; + u16 buf_size; + + /* activate the timer for the recipient's addBA response */ + mod_timer(&tid_tx->addba_resp_timer, jiffies + ADDBA_RESP_INTERVAL); + ht_dbg(sdata, "activated addBA response timer on %pM tid %d\n", + sta->sta.addr, tid); + + spin_lock_bh(&sta->lock); + sta->ampdu_mlme.last_addba_req_time[tid] = jiffies; + sta->ampdu_mlme.addba_req_num[tid]++; + spin_unlock_bh(&sta->lock); + + if (sta->sta.he_cap.has_he) { + buf_size = local->hw.max_tx_aggregation_subframes; + } else { + /* + * We really should use what the driver told us it will + * transmit as the maximum, but certain APs (e.g. the + * LinkSys WRT120N with FW v1.0.07 build 002 Jun 18 2012) + * will crash when we use a lower number. + */ + buf_size = IEEE80211_MAX_AMPDU_BUF_HT; + } + + /* send AddBA request */ + ieee80211_send_addba_request(sdata, sta->sta.addr, tid, + tid_tx->dialog_token, tid_tx->ssn, + buf_size, tid_tx->timeout); +} + void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) { struct tid_ampdu_tx *tid_tx; @@ -462,7 +499,6 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) .timeout = 0, }; int ret; - u16 buf_size; tid_tx = rcu_dereference_protected_tid_tx(sta, tid); @@ -485,6 +521,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) params.ssn = sta->tid_seq[tid] >> 4; ret = drv_ampdu_action(local, sdata, ¶ms); + tid_tx->ssn = params.ssn; if (ret) { ht_dbg(sdata, "BA request denied - HW unavailable for %pM tid %d\n", @@ -501,32 +538,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) return; } - /* activate the timer for the recipient's addBA response */ - mod_timer(&tid_tx->addba_resp_timer, jiffies + ADDBA_RESP_INTERVAL); - ht_dbg(sdata, "activated addBA response timer on %pM tid %d\n", - sta->sta.addr, tid); - - spin_lock_bh(&sta->lock); - sta->ampdu_mlme.last_addba_req_time[tid] = jiffies; - sta->ampdu_mlme.addba_req_num[tid]++; - spin_unlock_bh(&sta->lock); - - if (sta->sta.he_cap.has_he) { - buf_size = local->hw.max_tx_aggregation_subframes; - } else { - /* - * We really should use what the driver told us it will - * transmit as the maximum, but certain APs (e.g. the - * LinkSys WRT120N with FW v1.0.07 build 002 Jun 18 2012) - * will crash when we use a lower number. - */ - buf_size = IEEE80211_MAX_AMPDU_BUF_HT; - } - - /* send AddBA request */ - ieee80211_send_addba_request(sdata, sta->sta.addr, tid, - tid_tx->dialog_token, params.ssn, - buf_size, tid_tx->timeout); + ieee80211_send_addba_with_timeout(sta, tid_tx); } /* @@ -571,7 +583,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, "Requested to start BA session on reserved tid=%d", tid)) return -EINVAL; - if (!pubsta->ht_cap.ht_supported) + if (!pubsta->ht_cap.ht_supported && + sta->sdata->vif.bss_conf.chandef.chan->band != NL80211_BAND_6GHZ) return -EINVAL; if (WARN_ON_ONCE(!local->ops->ampdu_action)) @@ -602,6 +615,14 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, return -EINVAL; } + if (test_sta_flag(sta, WLAN_STA_MFP) && + !test_sta_flag(sta, WLAN_STA_AUTHORIZED)) { + ht_dbg(sdata, + "MFP STA not authorized - deny BA session request %pM tid %d\n", + sta->sta.addr, tid); + return -EINVAL; + } + /* * 802.11n-2009 11.5.1.1: If the initiating STA is an HT STA, is a * member of an IBSS, and has no other existing Block Ack agreement @@ -860,6 +881,7 @@ void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid, { struct ieee80211_sub_if_data *sdata = sta->sdata; bool send_delba = false; + bool start_txq = false; ht_dbg(sdata, "Stopping Tx BA session for %pM tid %d\n", sta->sta.addr, tid); @@ -877,10 +899,14 @@ void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid, send_delba = true; ieee80211_remove_tid_tx(sta, tid); + start_txq = true; unlock_sta: spin_unlock_bh(&sta->lock); + if (start_txq) + ieee80211_agg_start_txq(sta, tid, false); + if (send_delba) ieee80211_send_delba(sdata, sta->sta.addr, tid, WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 1b50bbf030ed..16f37fd0ac0e 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1949,13 +1949,11 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh, const struct mesh_setup *setup) { u8 *new_ie; - const u8 *old_ie; struct ieee80211_sub_if_data *sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh); /* allocate information elements */ new_ie = NULL; - old_ie = ifmsh->ie; if (setup->ie_len) { new_ie = kmemdup(setup->ie, setup->ie_len, @@ -1965,7 +1963,6 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh, } ifmsh->ie_len = setup->ie_len; ifmsh->ie = new_ie; - kfree(old_ie); /* now copy the rest of the setup parameters */ ifmsh->mesh_id_len = setup->mesh_id_len; diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 2c9b3eb8b652..f4c7e0af896b 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1202,8 +1202,11 @@ static inline void drv_wake_tx_queue(struct ieee80211_local *local, { struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->txq.vif); - if (local->in_reconfig) + /* In reconfig don't transmit now, but mark for waking later */ + if (local->in_reconfig) { + set_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txq->flags); return; + } if (!check_sdata_in_driver(sdata)) return; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index ccaf2389ccc1..ad00f31e2002 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2418,11 +2418,18 @@ static void ieee80211_sta_tx_wmm_ac_notify(struct ieee80211_sub_if_data *sdata, u16 tx_time) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - u16 tid = ieee80211_get_tid(hdr); - int ac = ieee80211_ac_from_tid(tid); - struct ieee80211_sta_tx_tspec *tx_tspec = &ifmgd->tx_tspec[ac]; + u16 tid; + int ac; + struct ieee80211_sta_tx_tspec *tx_tspec; unsigned long now = jiffies; + if (!ieee80211_is_data_qos(hdr->frame_control)) + return; + + tid = ieee80211_get_tid(hdr); + ac = ieee80211_ac_from_tid(tid); + tx_tspec = &ifmgd->tx_tspec[ac]; + if (likely(!tx_tspec->admitted_time)) return; @@ -4946,7 +4953,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, */ if (new_sta) { u32 rates = 0, basic_rates = 0; - bool have_higher_than_11mbit; + bool have_higher_than_11mbit = false; int min_rate = INT_MAX, min_rate_index = -1; const struct cfg80211_bss_ies *ies; int shift = ieee80211_vif_get_shift(&sdata->vif); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index c7e6bf7c22c7..ab91683d9459 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1918,7 +1918,8 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) int keyid = rx->sta->ptk_idx; sta_ptk = rcu_dereference(rx->sta->ptk[keyid]); - if (ieee80211_has_protected(fc)) { + if (ieee80211_has_protected(fc) && + !(status->flag & RX_FLAG_IV_STRIPPED)) { cs = rx->sta->cipher_scheme; keyid = ieee80211_get_keyid(rx->skb, cs); @@ -2851,13 +2852,13 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) ether_addr_equal(sdata->vif.addr, hdr->addr3)) return RX_CONTINUE; - ac = ieee80211_select_queue_80211(sdata, skb, hdr); + ac = ieee802_1d_to_ac[skb->priority]; q = sdata->vif.hw_queue[ac]; if (ieee80211_queue_stopped(&local->hw, q)) { IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_congestion); return RX_DROP_MONITOR; } - skb_set_queue_mapping(skb, q); + skb_set_queue_mapping(skb, ac); if (!--mesh_hdr->ttl) { if (!is_multicast_ether_addr(hdr->addr1)) @@ -4692,7 +4693,7 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, goto drop; break; case RX_ENC_VHT: - if (WARN_ONCE(status->rate_idx > 9 || + if (WARN_ONCE(status->rate_idx > 11 || !status->nss || status->nss > 8, "Rate marked as a VHT rate but data is invalid: MCS: %d, NSS: %d\n", diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 2eb73be9b986..be0df78d4a79 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -180,6 +180,7 @@ struct tid_ampdu_tx { u8 stop_initiator; bool tx_stop; u16 buf_size; + u16 ssn; u16 failed_bar_ssn; bool bar_pending; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index decd46b38393..c1c117fdf318 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1227,6 +1227,8 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, elems->max_idle_period_ie = (void *)pos; break; case WLAN_EID_EXTENSION: + if (!elen) + break; if (pos[0] == WLAN_EID_EXT_HE_MU_EDCA && elen >= (sizeof(*elems->mu_edca_param_set) + 1)) { elems->mu_edca_param_set = (void *)&pos[1]; diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 4701edffb1f7..d5e3656fc67c 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1491,22 +1491,52 @@ static void mpls_dev_destroy_rcu(struct rcu_head *head) kfree(mdev); } -static void mpls_ifdown(struct net_device *dev, int event) +static int mpls_ifdown(struct net_device *dev, int event) { struct mpls_route __rcu **platform_label; struct net *net = dev_net(dev); - u8 alive, deleted; unsigned index; platform_label = rtnl_dereference(net->mpls.platform_label); for (index = 0; index < net->mpls.platform_labels; index++) { struct mpls_route *rt = rtnl_dereference(platform_label[index]); + bool nh_del = false; + u8 alive = 0; if (!rt) continue; - alive = 0; - deleted = 0; + if (event == NETDEV_UNREGISTER) { + u8 deleted = 0; + + for_nexthops(rt) { + struct net_device *nh_dev = + rtnl_dereference(nh->nh_dev); + + if (!nh_dev || nh_dev == dev) + deleted++; + if (nh_dev == dev) + nh_del = true; + } endfor_nexthops(rt); + + /* if there are no more nexthops, delete the route */ + if (deleted == rt->rt_nhn) { + mpls_route_update(net, index, NULL, NULL); + continue; + } + + if (nh_del) { + size_t size = sizeof(*rt) + rt->rt_nhn * + rt->rt_nh_size; + struct mpls_route *orig = rt; + + rt = kmalloc(size, GFP_KERNEL); + if (!rt) + return -ENOMEM; + memcpy(rt, orig, size); + } + } + change_nexthops(rt) { unsigned int nh_flags = nh->nh_flags; @@ -1530,16 +1560,15 @@ static void mpls_ifdown(struct net_device *dev, int event) next: if (!(nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))) alive++; - if (!rtnl_dereference(nh->nh_dev)) - deleted++; } endfor_nexthops(rt); WRITE_ONCE(rt->rt_nhn_alive, alive); - /* if there are no more nexthops, delete the route */ - if (event == NETDEV_UNREGISTER && deleted == rt->rt_nhn) - mpls_route_update(net, index, NULL, NULL); + if (nh_del) + mpls_route_update(net, index, rt, NULL); } + + return 0; } static void mpls_ifup(struct net_device *dev, unsigned int flags) @@ -1607,8 +1636,12 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, return NOTIFY_OK; switch (event) { + int err; + case NETDEV_DOWN: - mpls_ifdown(dev, event); + err = mpls_ifdown(dev, event); + if (err) + return notifier_from_errno(err); break; case NETDEV_UP: flags = dev_get_flags(dev); @@ -1619,13 +1652,18 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, break; case NETDEV_CHANGE: flags = dev_get_flags(dev); - if (flags & (IFF_RUNNING | IFF_LOWER_UP)) + if (flags & (IFF_RUNNING | IFF_LOWER_UP)) { mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN); - else - mpls_ifdown(dev, event); + } else { + err = mpls_ifdown(dev, event); + if (err) + return notifier_from_errno(err); + } break; case NETDEV_UNREGISTER: - mpls_ifdown(dev, event); + err = mpls_ifdown(dev, event); + if (err) + return notifier_from_errno(err); mdev = mpls_dev_get(dev); if (mdev) { mpls_dev_sysctl_unregister(dev, mdev); @@ -1636,8 +1674,6 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, case NETDEV_CHANGENAME: mdev = mpls_dev_get(dev); if (mdev) { - int err; - mpls_dev_sysctl_unregister(dev, mdev); err = mpls_dev_sysctl_register(dev, mdev); if (err) diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c index 0187e65176c0..114ef47db76d 100644 --- a/net/ncsi/ncsi-cmd.c +++ b/net/ncsi/ncsi-cmd.c @@ -18,6 +18,8 @@ #include "internal.h" #include "ncsi-pkt.h" +static const int padding_bytes = 26; + u32 ncsi_calculate_checksum(unsigned char *data, int len) { u32 checksum = 0; @@ -213,12 +215,17 @@ static int ncsi_cmd_handler_oem(struct sk_buff *skb, { struct ncsi_cmd_oem_pkt *cmd; unsigned int len; + int payload; + /* NC-SI spec DSP_0222_1.2.0, section 8.2.2.2 + * requires payload to be padded with 0 to + * 32-bit boundary before the checksum field. + * Ensure the padding bytes are accounted for in + * skb allocation + */ + payload = ALIGN(nca->payload, 4); len = sizeof(struct ncsi_cmd_pkt_hdr) + 4; - if (nca->payload < 26) - len += 26; - else - len += nca->payload; + len += max(payload, padding_bytes); cmd = skb_put_zero(skb, len); memcpy(&cmd->mfr_id, nca->data, nca->payload); @@ -272,6 +279,7 @@ static struct ncsi_request *ncsi_alloc_command(struct ncsi_cmd_arg *nca) struct net_device *dev = nd->dev; int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; + int payload; int len = hlen + tlen; struct sk_buff *skb; struct ncsi_request *nr; @@ -281,14 +289,14 @@ static struct ncsi_request *ncsi_alloc_command(struct ncsi_cmd_arg *nca) return NULL; /* NCSI command packet has 16-bytes header, payload, 4 bytes checksum. + * Payload needs padding so that the checksum field following payload is + * aligned to 32-bit boundary. * The packet needs padding if its payload is less than 26 bytes to * meet 64 bytes minimal ethernet frame length. */ len += sizeof(struct ncsi_cmd_pkt_hdr) + 4; - if (nca->payload < 26) - len += 26; - else - len += nca->payload; + payload = ALIGN(nca->payload, 4); + len += max(payload, padding_bytes); /* Allocate skb */ skb = alloc_skb(len, GFP_ATOMIC); diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c index a33ea45dec05..27700887c321 100644 --- a/net/ncsi/ncsi-netlink.c +++ b/net/ncsi/ncsi-netlink.c @@ -112,7 +112,11 @@ static int ncsi_write_package_info(struct sk_buff *skb, pnest = nla_nest_start_noflag(skb, NCSI_PKG_ATTR); if (!pnest) return -ENOMEM; - nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id); + rc = nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id); + if (rc) { + nla_nest_cancel(skb, pnest); + return rc; + } if ((0x1 << np->id) == ndp->package_whitelist) nla_put_flag(skb, NCSI_PKG_ATTR_FORCED); cnest = nla_nest_start_noflag(skb, NCSI_PKG_ATTR_CHANNEL_LIST); diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 5d5bdf450091..451b2df998ea 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -336,14 +336,15 @@ static int __nf_register_net_hook(struct net *net, int pf, p = nf_entry_dereference(*pp); new_hooks = nf_hook_entries_grow(p, reg); - if (!IS_ERR(new_hooks)) + if (!IS_ERR(new_hooks)) { + hooks_validate(new_hooks); rcu_assign_pointer(*pp, new_hooks); + } mutex_unlock(&nf_hook_mutex); if (IS_ERR(new_hooks)) return PTR_ERR(new_hooks); - hooks_validate(new_hooks); #ifdef CONFIG_NETFILTER_INGRESS if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) net_inc_ingress_queue(); diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index d1524ca4b90e..a189079a6ea5 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1421,7 +1421,7 @@ int __init ip_vs_conn_init(void) pr_info("Connection hash table configured " "(size=%d, memory=%ldKbytes)\n", ip_vs_conn_tab_size, - (long)(ip_vs_conn_tab_size*sizeof(struct list_head))/1024); + (long)(ip_vs_conn_tab_size*sizeof(*ip_vs_conn_tab))/1024); IP_VS_DBG(0, "Each connection entry needs %zd bytes at least\n", sizeof(struct ip_vs_conn)); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 89aa1fc334b1..ccd6af144074 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1982,7 +1982,6 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int struct ip_vs_proto_data *pd; struct ip_vs_conn *cp; int ret, pkts; - int conn_reuse_mode; struct sock *sk; /* Already marked as IPVS request or reply? */ @@ -2059,15 +2058,16 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int cp = INDIRECT_CALL_1(pp->conn_in_get, ip_vs_conn_in_get_proto, ipvs, af, skb, &iph); - conn_reuse_mode = sysctl_conn_reuse_mode(ipvs); - if (conn_reuse_mode && !iph.fragoffs && is_new_conn(skb, &iph) && cp) { + if (!iph.fragoffs && is_new_conn(skb, &iph) && cp) { + int conn_reuse_mode = sysctl_conn_reuse_mode(ipvs); bool old_ct = false, resched = false; if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest && unlikely(!atomic_read(&cp->dest->weight))) { resched = true; old_ct = ip_vs_conn_uses_old_conntrack(cp, skb); - } else if (is_new_conn_expected(cp, conn_reuse_mode)) { + } else if (conn_reuse_mode && + is_new_conn_expected(cp, conn_reuse_mode)) { old_ct = ip_vs_conn_uses_old_conntrack(cp, skb); if (!atomic_read(&cp->n_control)) { resched = true; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 4bcc36e4b2ef..d9b6f2001d00 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1709,15 +1709,17 @@ repeat: pr_debug("nf_conntrack_in: Can't track with proto module\n"); nf_conntrack_put(&ct->ct_general); skb->_nfct = 0; - NF_CT_STAT_INC_ATOMIC(state->net, invalid); - if (ret == -NF_DROP) - NF_CT_STAT_INC_ATOMIC(state->net, drop); /* Special case: TCP tracker reports an attempt to reopen a * closed/aborted connection. We have to go back and create a * fresh conntrack. */ if (ret == -NF_REPEAT) goto repeat; + + NF_CT_STAT_INC_ATOMIC(state->net, invalid); + if (ret == -NF_DROP) + NF_CT_STAT_INC_ATOMIC(state->net, drop); + ret = -ret; goto out; } diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 810cca24b399..7626f3e1c70a 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -489,6 +489,15 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir); ct->proto.sctp.vtag[!dir] = ih->init_tag; + + /* don't renew timeout on init retransmit so + * port reuse by client or NAT middlebox cannot + * keep entry alive indefinitely (incl. nat info). + */ + if (new_state == SCTP_CONNTRACK_CLOSED && + old_state == SCTP_CONNTRACK_CLOSED && + nf_ct_is_confirmed(ct)) + ignore = true; } ct->proto.sctp.state = new_state; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 848b137151c2..b8cc3339a249 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -354,8 +354,8 @@ static void tcp_options(const struct sk_buff *skb, length, buff); BUG_ON(ptr == NULL); - state->td_scale = - state->flags = 0; + state->td_scale = 0; + state->flags &= IP_CT_TCP_FLAG_BE_LIBERAL; while (length > 0) { int opcode=*ptr++; @@ -840,6 +840,16 @@ static bool nf_conntrack_tcp_established(const struct nf_conn *ct) test_bit(IPS_ASSURED_BIT, &ct->status); } +static void nf_ct_tcp_state_reset(struct ip_ct_tcp_state *state) +{ + state->td_end = 0; + state->td_maxend = 0; + state->td_maxwin = 0; + state->td_maxack = 0; + state->td_scale = 0; + state->flags &= IP_CT_TCP_FLAG_BE_LIBERAL; +} + /* Returns verdict for packet, or -1 for invalid. */ int nf_conntrack_tcp_packet(struct nf_conn *ct, struct sk_buff *skb, @@ -946,8 +956,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK; ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags = ct->proto.tcp.last_flags; - memset(&ct->proto.tcp.seen[dir], 0, - sizeof(struct ip_ct_tcp_state)); + nf_ct_tcp_state_reset(&ct->proto.tcp.seen[dir]); break; } ct->proto.tcp.last_index = index; diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index f8f52ff99cfb..643dbfe7c581 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -64,6 +64,15 @@ static void nf_queue_entry_release_br_nf_refs(struct sk_buff *skb) #endif } +static void nf_queue_sock_put(struct sock *sk) +{ +#ifdef CONFIG_INET + sock_gen_put(sk); +#else + sock_put(sk); +#endif +} + void nf_queue_entry_release_refs(struct nf_queue_entry *entry) { struct nf_hook_state *state = &entry->state; @@ -74,7 +83,7 @@ void nf_queue_entry_release_refs(struct nf_queue_entry *entry) if (state->out) dev_put(state->out); if (state->sk) - sock_put(state->sk); + nf_queue_sock_put(state->sk); nf_queue_entry_release_br_nf_refs(entry->skb); } @@ -99,18 +108,20 @@ static void nf_queue_entry_get_br_nf_refs(struct sk_buff *skb) } /* Bump dev refs so they don't vanish while packet is out */ -void nf_queue_entry_get_refs(struct nf_queue_entry *entry) +bool nf_queue_entry_get_refs(struct nf_queue_entry *entry) { struct nf_hook_state *state = &entry->state; + if (state->sk && !refcount_inc_not_zero(&state->sk->sk_refcnt)) + return false; + if (state->in) dev_hold(state->in); if (state->out) dev_hold(state->out); - if (state->sk) - sock_hold(state->sk); nf_queue_entry_get_br_nf_refs(entry->skb); + return true; } EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs); @@ -201,7 +212,10 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, .size = sizeof(*entry) + route_key_size, }; - nf_queue_entry_get_refs(entry); + if (!nf_queue_entry_get_refs(entry)) { + kfree(entry); + return -ENOTCONN; + } switch (entry->state.pf) { case AF_INET: diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 373ea0e49f12..545da270e802 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5184,12 +5184,15 @@ static int nf_tables_updobj(const struct nft_ctx *ctx, { struct nft_object *newobj; struct nft_trans *trans; - int err; + int err = -ENOMEM; + + if (!try_module_get(type->owner)) + return -ENOENT; trans = nft_trans_alloc(ctx, NFT_MSG_NEWOBJ, sizeof(struct nft_trans_obj)); if (!trans) - return -ENOMEM; + goto err_trans; newobj = nft_obj_init(ctx, type, attr); if (IS_ERR(newobj)) { @@ -5206,6 +5209,8 @@ static int nf_tables_updobj(const struct nft_ctx *ctx, err_free_trans: kfree(trans); +err_trans: + module_put(type->owner); return err; } @@ -6544,7 +6549,7 @@ static void nft_obj_commit_update(struct nft_trans *trans) if (obj->ops->update) obj->ops->update(obj, newobj); - kfree(newobj); + nft_obj_destroy(&trans->ctx, newobj); } static void nft_commit_release(struct nft_trans *trans) @@ -7109,7 +7114,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; case NFT_MSG_NEWOBJ: if (nft_trans_obj_update(trans)) { - kfree(nft_trans_obj_newobj(trans)); + nft_obj_destroy(&trans->ctx, nft_trans_obj_newobj(trans)); nft_trans_destroy(trans); } else { trans->ctx.table->use--; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 96c74c4c7176..ceb0ef437e23 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -153,7 +153,7 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv) struct nft_rule *const *rules; const struct nft_rule *rule; const struct nft_expr *expr, *last; - struct nft_regs regs; + struct nft_regs regs = {}; unsigned int stackptr = 0; struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; bool genbit = READ_ONCE(net->nft.gencursor); diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 3aa4306ca39f..2d3bc22c855c 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -55,7 +55,8 @@ struct nft_flow_rule *nft_flow_rule_create(struct net *net, expr = nft_expr_first(rule); while (nft_expr_more(rule, expr)) { - if (expr->ops->offload_flags & NFT_OFFLOAD_F_ACTION) + if (expr->ops->offload_action && + expr->ops->offload_action(expr)) num_actions++; expr = nft_expr_next(expr); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 7ca2ca4bba05..b36af4741ad3 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -557,7 +557,8 @@ __build_packet_message(struct nfnl_log_net *log, goto nla_put_failure; if (indev && skb->dev && - skb->mac_header != skb->network_header) { + skb_mac_header_was_set(skb) && + skb_mac_header_len(skb) != 0) { struct nfulnl_msg_packet_hw phw; int len; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index a8cb562da3fe..7d3ab08a5a2d 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -562,7 +562,8 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, goto nla_put_failure; if (indev && entskb->dev && - skb_mac_header_was_set(entskb)) { + skb_mac_header_was_set(entskb) && + skb_mac_header_len(entskb) != 0) { struct nfqnl_msg_packet_hw phw; int len; @@ -711,9 +712,15 @@ static struct nf_queue_entry * nf_queue_entry_dup(struct nf_queue_entry *e) { struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC); - if (entry) - nf_queue_entry_get_refs(entry); - return entry; + + if (!entry) + return NULL; + + if (nf_queue_entry_get_refs(entry)) + return entry; + + kfree(entry); + return NULL; } #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c index c2e78c160fd7..6007089e1c2f 100644 --- a/net/netfilter/nft_dup_netdev.c +++ b/net/netfilter/nft_dup_netdev.c @@ -67,6 +67,11 @@ static int nft_dup_netdev_offload(struct nft_offload_ctx *ctx, return nft_fwd_dup_netdev_offload(ctx, flow, FLOW_ACTION_MIRRED, oif); } +static bool nft_dup_netdev_offload_action(const struct nft_expr *expr) +{ + return true; +} + static struct nft_expr_type nft_dup_netdev_type; static const struct nft_expr_ops nft_dup_netdev_ops = { .type = &nft_dup_netdev_type, @@ -75,6 +80,7 @@ static const struct nft_expr_ops nft_dup_netdev_ops = { .init = nft_dup_netdev_init, .dump = nft_dup_netdev_dump, .offload = nft_dup_netdev_offload, + .offload_action = nft_dup_netdev_offload_action, }; static struct nft_expr_type nft_dup_netdev_type __read_mostly = { diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index b77985986b24..3b0dcd170551 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -77,6 +77,11 @@ static int nft_fwd_netdev_offload(struct nft_offload_ctx *ctx, return nft_fwd_dup_netdev_offload(ctx, flow, FLOW_ACTION_REDIRECT, oif); } +static bool nft_fwd_netdev_offload_action(const struct nft_expr *expr) +{ + return true; +} + struct nft_fwd_neigh { enum nft_registers sreg_dev:8; enum nft_registers sreg_addr:8; @@ -219,6 +224,7 @@ static const struct nft_expr_ops nft_fwd_netdev_ops = { .dump = nft_fwd_netdev_dump, .validate = nft_fwd_validate, .offload = nft_fwd_netdev_offload, + .offload_action = nft_fwd_netdev_offload_action, }; static const struct nft_expr_ops * diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index c7f0ef73d939..98a8149be094 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -163,6 +163,16 @@ static int nft_immediate_offload(struct nft_offload_ctx *ctx, return 0; } +static bool nft_immediate_offload_action(const struct nft_expr *expr) +{ + const struct nft_immediate_expr *priv = nft_expr_priv(expr); + + if (priv->dreg == NFT_REG_VERDICT) + return true; + + return false; +} + static const struct nft_expr_ops nft_imm_ops = { .type = &nft_imm_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)), @@ -173,7 +183,7 @@ static const struct nft_expr_ops nft_imm_ops = { .dump = nft_immediate_dump, .validate = nft_immediate_validate, .offload = nft_immediate_offload, - .offload_flags = NFT_OFFLOAD_F_ACTION, + .offload_action = nft_immediate_offload_action, }; struct nft_expr_type nft_imm_type __read_mostly = { diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 921f8f45b17f..cf0512fc648e 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -420,6 +420,9 @@ static int nft_payload_l4csum_offset(const struct nft_pktinfo *pkt, struct sk_buff *skb, unsigned int *l4csum_offset) { + if (pkt->xt.fragoff) + return -1; + switch (pkt->tprot) { case IPPROTO_TCP: *l4csum_offset = offsetof(struct tcphdr, check); diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index 637ce3e8c575..4026ec38526f 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -14,6 +14,32 @@ struct nft_socket { }; }; +static struct sock *nft_socket_do_lookup(const struct nft_pktinfo *pkt) +{ + const struct net_device *indev = nft_in(pkt); + const struct sk_buff *skb = pkt->skb; + struct sock *sk = NULL; + + if (!indev) + return NULL; + + switch (nft_pf(pkt)) { + case NFPROTO_IPV4: + sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, indev); + break; +#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) + case NFPROTO_IPV6: + sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, indev); + break; +#endif + default: + WARN_ON_ONCE(1); + break; + } + + return sk; +} + static void nft_socket_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -27,20 +53,7 @@ static void nft_socket_eval(const struct nft_expr *expr, sk = NULL; if (!sk) - switch(nft_pf(pkt)) { - case NFPROTO_IPV4: - sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, nft_in(pkt)); - break; -#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) - case NFPROTO_IPV6: - sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, nft_in(pkt)); - break; -#endif - default: - WARN_ON_ONCE(1); - regs->verdict.code = NFT_BREAK; - return; - } + sk = nft_socket_do_lookup(pkt); if (!sk) { regs->verdict.code = NFT_BREAK; @@ -123,6 +136,16 @@ static int nft_socket_dump(struct sk_buff *skb, return 0; } +static int nft_socket_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + return nft_chain_validate_hooks(ctx->chain, + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_LOCAL_OUT)); +} + static struct nft_expr_type nft_socket_type; static const struct nft_expr_ops nft_socket_ops = { .type = &nft_socket_type, @@ -130,6 +153,7 @@ static const struct nft_expr_ops nft_socket_ops = { .eval = nft_socket_eval, .init = nft_socket_init, .dump = nft_socket_dump, + .validate = nft_socket_validate, }; static struct nft_expr_type nft_socket_type __read_mostly = { diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c index e2c1fc608841..15abb0e49603 100644 --- a/net/netfilter/nft_synproxy.c +++ b/net/netfilter/nft_synproxy.c @@ -191,8 +191,10 @@ static int nft_synproxy_do_init(const struct nft_ctx *ctx, if (err) goto nf_ct_failure; err = nf_synproxy_ipv6_init(snet, ctx->net); - if (err) + if (err) { + nf_synproxy_ipv4_fini(snet, ctx->net); goto nf_ct_failure; + } break; } diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 5e1239cef000..91b35b7c80d8 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -885,6 +885,8 @@ int netlbl_bitmap_walk(const unsigned char *bitmap, u32 bitmap_len, unsigned char bitmask; unsigned char byte; + if (offset >= bitmap_len) + return -1; byte_offset = offset / 8; byte = bitmap[byte_offset]; bit_spot = offset; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index cb35680db9b2..8aefc52542a0 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -148,6 +148,8 @@ static const struct rhashtable_params netlink_rhashtable_params; static inline u32 netlink_group_mask(u32 group) { + if (group > 32) + return 0; return group ? 1 << (group - 1) : 0; } @@ -1862,6 +1864,11 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) if (msg->msg_flags&MSG_OOB) return -EOPNOTSUPP; + if (len == 0) { + pr_warn_once("Zero length message leads to an empty skb\n"); + return -ENODATA; + } + err = scm_send(sock, msg, &scm, true); if (err < 0) return err; @@ -2246,6 +2253,13 @@ static int netlink_dump(struct sock *sk) * single netdev. The outcome is MSG_TRUNC error. */ skb_reserve(skb, skb_tailroom(skb) - alloc_size); + + /* Make sure malicious BPF programs can not read unitialized memory + * from skb->head -> skb->data + */ + skb_reset_network_header(skb); + skb_reset_mac_header(skb); + netlink_skb_set_owner_r(skb, sk); if (nlk->dump_done_errno > 0) { diff --git a/net/nfc/core.c b/net/nfc/core.c index c5f9c3ee82f8..63701a980ee1 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -38,7 +38,7 @@ int nfc_fw_download(struct nfc_dev *dev, const char *firmware_name) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -94,13 +94,13 @@ int nfc_dev_up(struct nfc_dev *dev) device_lock(&dev->dev); - if (dev->rfkill && rfkill_blocked(dev->rfkill)) { - rc = -ERFKILL; + if (dev->shutting_down) { + rc = -ENODEV; goto error; } - if (!device_is_registered(&dev->dev)) { - rc = -ENODEV; + if (dev->rfkill && rfkill_blocked(dev->rfkill)) { + rc = -ERFKILL; goto error; } @@ -142,7 +142,7 @@ int nfc_dev_down(struct nfc_dev *dev) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -206,7 +206,7 @@ int nfc_start_poll(struct nfc_dev *dev, u32 im_protocols, u32 tm_protocols) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -245,7 +245,7 @@ int nfc_stop_poll(struct nfc_dev *dev) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -290,7 +290,7 @@ int nfc_dep_link_up(struct nfc_dev *dev, int target_index, u8 comm_mode) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -334,7 +334,7 @@ int nfc_dep_link_down(struct nfc_dev *dev) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -400,7 +400,7 @@ int nfc_activate_target(struct nfc_dev *dev, u32 target_idx, u32 protocol) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -446,7 +446,7 @@ int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx, u8 mode) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -493,7 +493,7 @@ int nfc_data_exchange(struct nfc_dev *dev, u32 target_idx, struct sk_buff *skb, device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; kfree_skb(skb); goto error; @@ -550,7 +550,7 @@ int nfc_enable_se(struct nfc_dev *dev, u32 se_idx) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -599,7 +599,7 @@ int nfc_disable_se(struct nfc_dev *dev, u32 se_idx) device_lock(&dev->dev); - if (!device_is_registered(&dev->dev)) { + if (dev->shutting_down) { rc = -ENODEV; goto error; } @@ -1118,11 +1118,7 @@ int nfc_register_device(struct nfc_dev *dev) if (rc) pr_err("Could not register llcp device\n"); - rc = nfc_genl_device_added(dev); - if (rc) - pr_debug("The userspace won't be notified that the device %s was added\n", - dev_name(&dev->dev)); - + device_lock(&dev->dev); dev->rfkill = rfkill_alloc(dev_name(&dev->dev), &dev->dev, RFKILL_TYPE_NFC, &nfc_rfkill_ops, dev); if (dev->rfkill) { @@ -1131,6 +1127,13 @@ int nfc_register_device(struct nfc_dev *dev) dev->rfkill = NULL; } } + dev->shutting_down = false; + device_unlock(&dev->dev); + + rc = nfc_genl_device_added(dev); + if (rc) + pr_debug("The userspace won't be notified that the device %s was added\n", + dev_name(&dev->dev)); return 0; } @@ -1147,24 +1150,24 @@ void nfc_unregister_device(struct nfc_dev *dev) pr_debug("dev_name=%s\n", dev_name(&dev->dev)); + rc = nfc_genl_device_removed(dev); + if (rc) + pr_debug("The userspace won't be notified that the device %s " + "was removed\n", dev_name(&dev->dev)); + + device_lock(&dev->dev); if (dev->rfkill) { rfkill_unregister(dev->rfkill); rfkill_destroy(dev->rfkill); } + dev->shutting_down = true; + device_unlock(&dev->dev); if (dev->ops->check_presence) { - device_lock(&dev->dev); - dev->shutting_down = true; - device_unlock(&dev->dev); del_timer_sync(&dev->check_pres_timer); cancel_work_sync(&dev->check_pres_work); } - rc = nfc_genl_device_removed(dev); - if (rc) - pr_debug("The userspace won't be notified that the device %s " - "was removed\n", dev_name(&dev->dev)); - nfc_llcp_unregister_device(dev); mutex_lock(&nfc_devlist_mutex); diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index a7e861eede2d..bd2174699af9 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -789,6 +789,11 @@ static int llcp_sock_sendmsg(struct socket *sock, struct msghdr *msg, lock_sock(sk); + if (!llcp_sock->local) { + release_sock(sk); + return -ENODEV; + } + if (sk->sk_type == SOCK_DGRAM) { DECLARE_SOCKADDR(struct sockaddr_nfc_llcp *, addr, msg->msg_name); diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 6a34a0a786ea..b2e922fcc70d 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -144,12 +144,15 @@ inline int nci_request(struct nci_dev *ndev, { int rc; - if (!test_bit(NCI_UP, &ndev->flags)) - return -ENETDOWN; - /* Serialize all requests */ mutex_lock(&ndev->req_lock); - rc = __nci_request(ndev, req, opt, timeout); + /* check the state after obtaing the lock against any races + * from nci_close_device when the device gets removed. + */ + if (test_bit(NCI_UP, &ndev->flags)) + rc = __nci_request(ndev, req, opt, timeout); + else + rc = -ENETDOWN; mutex_unlock(&ndev->req_lock); return rc; @@ -470,6 +473,11 @@ static int nci_open_device(struct nci_dev *ndev) mutex_lock(&ndev->req_lock); + if (test_bit(NCI_UNREG, &ndev->flags)) { + rc = -ENODEV; + goto done; + } + if (test_bit(NCI_UP, &ndev->flags)) { rc = -EALREADY; goto done; @@ -533,9 +541,17 @@ done: static int nci_close_device(struct nci_dev *ndev) { nci_req_cancel(ndev, ENODEV); + + /* This mutex needs to be held as a barrier for + * caller nci_unregister_device + */ mutex_lock(&ndev->req_lock); if (!test_and_clear_bit(NCI_UP, &ndev->flags)) { + /* Need to flush the cmd wq in case + * there is a queued/running cmd_work + */ + flush_workqueue(ndev->cmd_wq); del_timer_sync(&ndev->cmd_timer); del_timer_sync(&ndev->data_timer); mutex_unlock(&ndev->req_lock); @@ -570,8 +586,8 @@ static int nci_close_device(struct nci_dev *ndev) /* Flush cmd wq */ flush_workqueue(ndev->cmd_wq); - /* Clear flags */ - ndev->flags = 0; + /* Clear flags except NCI_UNREG */ + ndev->flags &= BIT(NCI_UNREG); mutex_unlock(&ndev->req_lock); @@ -1253,6 +1269,12 @@ void nci_unregister_device(struct nci_dev *ndev) { struct nci_conn_info *conn_info, *n; + /* This set_bit is not protected with specialized barrier, + * However, it is fine because the mutex_lock(&ndev->req_lock); + * in nci_close_device() will help to emit one. + */ + set_bit(NCI_UNREG, &ndev->flags); + nci_close_device(ndev); destroy_workqueue(ndev->cmd_wq); diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 99b06a16b808..9e94f732e717 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -644,8 +644,10 @@ static int nfc_genl_dump_devices_done(struct netlink_callback *cb) { struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0]; - nfc_device_iter_exit(iter); - kfree(iter); + if (iter) { + nfc_device_iter_exit(iter); + kfree(iter); + } return 0; } @@ -1250,7 +1252,7 @@ int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name, struct sk_buff *msg; void *hdr; - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!msg) return -ENOMEM; @@ -1266,7 +1268,7 @@ int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name, genlmsg_end(msg, hdr); - genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL); + genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_ATOMIC); return 0; @@ -1400,8 +1402,10 @@ static int nfc_genl_dump_ses_done(struct netlink_callback *cb) { struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0]; - nfc_device_iter_exit(iter); - kfree(iter); + if (iter) { + nfc_device_iter_exit(iter); + kfree(iter); + } return 0; } diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 5c68f9ea9881..ae40593daf21 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -427,12 +427,43 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto, memcpy(addr, new_addr, sizeof(__be32[4])); } -static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask) +static void set_ipv6_dsfield(struct sk_buff *skb, struct ipv6hdr *nh, u8 ipv6_tclass, u8 mask) { + u8 old_ipv6_tclass = ipv6_get_dsfield(nh); + + ipv6_tclass = OVS_MASKED(old_ipv6_tclass, ipv6_tclass, mask); + + if (skb->ip_summed == CHECKSUM_COMPLETE) + csum_replace(&skb->csum, (__force __wsum)(old_ipv6_tclass << 12), + (__force __wsum)(ipv6_tclass << 12)); + + ipv6_change_dsfield(nh, ~mask, ipv6_tclass); +} + +static void set_ipv6_fl(struct sk_buff *skb, struct ipv6hdr *nh, u32 fl, u32 mask) +{ + u32 ofl; + + ofl = nh->flow_lbl[0] << 16 | nh->flow_lbl[1] << 8 | nh->flow_lbl[2]; + fl = OVS_MASKED(ofl, fl, mask); + /* Bits 21-24 are always unmasked, so this retains their values. */ - OVS_SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16)); - OVS_SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8)); - OVS_SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask); + nh->flow_lbl[0] = (u8)(fl >> 16); + nh->flow_lbl[1] = (u8)(fl >> 8); + nh->flow_lbl[2] = (u8)fl; + + if (skb->ip_summed == CHECKSUM_COMPLETE) + csum_replace(&skb->csum, (__force __wsum)htonl(ofl), (__force __wsum)htonl(fl)); +} + +static void set_ipv6_ttl(struct sk_buff *skb, struct ipv6hdr *nh, u8 new_ttl, u8 mask) +{ + new_ttl = OVS_MASKED(nh->hop_limit, new_ttl, mask); + + if (skb->ip_summed == CHECKSUM_COMPLETE) + csum_replace(&skb->csum, (__force __wsum)(nh->hop_limit << 8), + (__force __wsum)(new_ttl << 8)); + nh->hop_limit = new_ttl; } static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl, @@ -550,18 +581,17 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key, } } if (mask->ipv6_tclass) { - ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass); + set_ipv6_dsfield(skb, nh, key->ipv6_tclass, mask->ipv6_tclass); flow_key->ip.tos = ipv6_get_dsfield(nh); } if (mask->ipv6_label) { - set_ipv6_fl(nh, ntohl(key->ipv6_label), + set_ipv6_fl(skb, nh, ntohl(key->ipv6_label), ntohl(mask->ipv6_label)); flow_key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL); } if (mask->ipv6_hlimit) { - OVS_SET_MASKED(nh->hop_limit, key->ipv6_hlimit, - mask->ipv6_hlimit); + set_ipv6_ttl(skb, nh, key->ipv6_hlimit, mask->ipv6_hlimit); flow_key->ip.ttl = nh->hop_limit; } return 0; @@ -1007,7 +1037,7 @@ static int clone(struct datapath *dp, struct sk_buff *skb, int rem = nla_len(attr); bool dont_clone_flow_key; - /* The first action is always 'OVS_CLONE_ATTR_ARG'. */ + /* The first action is always 'OVS_CLONE_ATTR_EXEC'. */ clone_arg = nla_data(attr); dont_clone_flow_key = nla_get_u32(clone_arg); actions = nla_next(clone_arg, &rem); diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index b6f98eba71f1..816036b9c223 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -730,6 +730,57 @@ static bool skb_nfct_cached(struct net *net, } #if IS_ENABLED(CONFIG_NF_NAT) +static void ovs_nat_update_key(struct sw_flow_key *key, + const struct sk_buff *skb, + enum nf_nat_manip_type maniptype) +{ + if (maniptype == NF_NAT_MANIP_SRC) { + __be16 src; + + key->ct_state |= OVS_CS_F_SRC_NAT; + if (key->eth.type == htons(ETH_P_IP)) + key->ipv4.addr.src = ip_hdr(skb)->saddr; + else if (key->eth.type == htons(ETH_P_IPV6)) + memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr, + sizeof(key->ipv6.addr.src)); + else + return; + + if (key->ip.proto == IPPROTO_UDP) + src = udp_hdr(skb)->source; + else if (key->ip.proto == IPPROTO_TCP) + src = tcp_hdr(skb)->source; + else if (key->ip.proto == IPPROTO_SCTP) + src = sctp_hdr(skb)->source; + else + return; + + key->tp.src = src; + } else { + __be16 dst; + + key->ct_state |= OVS_CS_F_DST_NAT; + if (key->eth.type == htons(ETH_P_IP)) + key->ipv4.addr.dst = ip_hdr(skb)->daddr; + else if (key->eth.type == htons(ETH_P_IPV6)) + memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr, + sizeof(key->ipv6.addr.dst)); + else + return; + + if (key->ip.proto == IPPROTO_UDP) + dst = udp_hdr(skb)->dest; + else if (key->ip.proto == IPPROTO_TCP) + dst = tcp_hdr(skb)->dest; + else if (key->ip.proto == IPPROTO_SCTP) + dst = sctp_hdr(skb)->dest; + else + return; + + key->tp.dst = dst; + } +} + /* Modelled after nf_nat_ipv[46]_fn(). * range is only used for new, uninitialized NAT state. * Returns either NF_ACCEPT or NF_DROP. @@ -737,7 +788,7 @@ static bool skb_nfct_cached(struct net *net, static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, const struct nf_nat_range2 *range, - enum nf_nat_manip_type maniptype) + enum nf_nat_manip_type maniptype, struct sw_flow_key *key) { int hooknum, nh_off, err = NF_ACCEPT; @@ -810,58 +861,11 @@ push: skb_push(skb, nh_off); skb_postpush_rcsum(skb, skb->data, nh_off); - return err; -} - -static void ovs_nat_update_key(struct sw_flow_key *key, - const struct sk_buff *skb, - enum nf_nat_manip_type maniptype) -{ - if (maniptype == NF_NAT_MANIP_SRC) { - __be16 src; - - key->ct_state |= OVS_CS_F_SRC_NAT; - if (key->eth.type == htons(ETH_P_IP)) - key->ipv4.addr.src = ip_hdr(skb)->saddr; - else if (key->eth.type == htons(ETH_P_IPV6)) - memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr, - sizeof(key->ipv6.addr.src)); - else - return; - - if (key->ip.proto == IPPROTO_UDP) - src = udp_hdr(skb)->source; - else if (key->ip.proto == IPPROTO_TCP) - src = tcp_hdr(skb)->source; - else if (key->ip.proto == IPPROTO_SCTP) - src = sctp_hdr(skb)->source; - else - return; - - key->tp.src = src; - } else { - __be16 dst; - - key->ct_state |= OVS_CS_F_DST_NAT; - if (key->eth.type == htons(ETH_P_IP)) - key->ipv4.addr.dst = ip_hdr(skb)->daddr; - else if (key->eth.type == htons(ETH_P_IPV6)) - memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr, - sizeof(key->ipv6.addr.dst)); - else - return; - - if (key->ip.proto == IPPROTO_UDP) - dst = udp_hdr(skb)->dest; - else if (key->ip.proto == IPPROTO_TCP) - dst = tcp_hdr(skb)->dest; - else if (key->ip.proto == IPPROTO_SCTP) - dst = sctp_hdr(skb)->dest; - else - return; + /* Update the flow key if NAT successful. */ + if (err == NF_ACCEPT) + ovs_nat_update_key(key, skb, maniptype); - key->tp.dst = dst; - } + return err; } /* Returns NF_DROP if the packet should be dropped, NF_ACCEPT otherwise. */ @@ -903,7 +907,7 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, } else { return NF_ACCEPT; /* Connection is not NATed. */ } - err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype); + err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype, key); if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) { if (ct->status & IPS_SRC_NAT) { @@ -913,17 +917,13 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, maniptype = NF_NAT_MANIP_SRC; err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, - maniptype); + maniptype, key); } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { err = ovs_ct_nat_execute(skb, ct, ctinfo, NULL, - NF_NAT_MANIP_SRC); + NF_NAT_MANIP_SRC, key); } } - /* Mark NAT done if successful and update the flow key. */ - if (err == NF_ACCEPT) - ovs_nat_update_key(key, skb, maniptype); - return err; } #else /* !CONFIG_NF_NAT */ diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index d7559c64795d..8461de79f67b 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2179,8 +2179,8 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, icmpv6_key->icmpv6_type = ntohs(output->tp.src); icmpv6_key->icmpv6_code = ntohs(output->tp.dst); - if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || - icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { + if (swkey->tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) || + swkey->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { struct ovs_key_nd *nd_key; nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key)); @@ -2329,7 +2329,7 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2); if (new_acts_size > MAX_ACTIONS_BUFSIZE) { - if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) { + if ((next_offset + req_size) > MAX_ACTIONS_BUFSIZE) { OVS_NLERR(log, "Flow action size exceeds max %u", MAX_ACTIONS_BUFSIZE); return ERR_PTR(-EMSGSIZE); @@ -3284,7 +3284,9 @@ static int clone_action_to_attr(const struct nlattr *attr, if (!start) return -EMSGSIZE; - err = ovs_nla_put_actions(nla_data(attr), rem, skb); + /* Skipping the OVS_CLONE_ATTR_EXEC that is always the first attribute. */ + attr = nla_next(nla_data(attr), &rem); + err = ovs_nla_put_actions(attr, rem, skb); if (err) nla_nest_cancel(skb, start); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 0ffbf3d17911..a2696acbcd9d 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1715,6 +1715,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) match->prot_hook.dev = po->prot_hook.dev; match->prot_hook.func = packet_rcv_fanout; match->prot_hook.af_packet_priv = match; + match->prot_hook.af_packet_net = read_pnet(&match->net); match->prot_hook.id_match = match_fanout_group; list_add(&match->list, &fanout_list); } @@ -1728,7 +1729,10 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) err = -ENOSPC; if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) { __dev_remove_pack(&po->prot_hook); - po->fanout = match; + + /* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */ + WRITE_ONCE(po->fanout, match); + po->rollover = rollover; rollover = NULL; refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1); @@ -2253,8 +2257,11 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, copy_skb = skb_get(skb); skb_head = skb->data; } - if (copy_skb) + if (copy_skb) { + memset(&PACKET_SKB_CB(copy_skb)->sa.ll, 0, + sizeof(PACKET_SKB_CB(copy_skb)->sa.ll)); skb_set_owner_r(copy_skb, sk); + } } snaplen = po->rx_ring.frame_size - macoff; if ((int)snaplen < 0) { @@ -2784,8 +2791,9 @@ tpacket_error: status = TP_STATUS_SEND_REQUEST; err = po->xmit(skb); - if (unlikely(err > 0)) { - err = net_xmit_errno(err); + if (unlikely(err != 0)) { + if (err > 0) + err = net_xmit_errno(err); if (err && __packet_get_status(po, ph) == TP_STATUS_AVAILABLE) { /* skb was destructed already */ @@ -2986,8 +2994,12 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) skb->no_fcs = 1; err = po->xmit(skb); - if (err > 0 && (err = net_xmit_errno(err)) != 0) - goto out_unlock; + if (unlikely(err != 0)) { + if (err > 0) + err = net_xmit_errno(err); + if (err) + goto out_unlock; + } dev_put(dev); @@ -3294,6 +3306,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, po->prot_hook.func = packet_rcv_spkt; po->prot_hook.af_packet_priv = sk; + po->prot_hook.af_packet_net = sock_net(sk); if (proto) { po->prot_hook.type = proto; @@ -3400,6 +3413,8 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, sock_recv_ts_and_drops(msg, sk, skb); if (msg->msg_name) { + const size_t max_len = min(sizeof(skb->cb), + sizeof(struct sockaddr_storage)); int copy_len; /* If the address length field is there to be filled @@ -3422,6 +3437,10 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, msg->msg_namelen = sizeof(struct sockaddr_ll); } } + if (WARN_ON_ONCE(copy_len > max_len)) { + copy_len = max_len; + msg->msg_namelen = copy_len; + } memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len); } @@ -3874,7 +3893,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv } case PACKET_FANOUT_DATA: { - if (!po->fanout) + /* Paired with the WRITE_ONCE() in fanout_add() */ + if (!READ_ONCE(po->fanout)) return -EINVAL; return fanout_set_data(po, optval, optlen); @@ -4453,9 +4473,10 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, } out_free_pg_vec: - bitmap_free(rx_owner_map); - if (pg_vec) + if (pg_vec) { + bitmap_free(rx_owner_map); free_pg_vec(pg_vec, order, req->tp_block_nr); + } out: return err; } diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 4577e43cb777..0c5d0f7b8b4b 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -868,6 +868,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp, err = pep_accept_conn(newsk, skb); if (err) { + __sock_put(sk); sock_put(newsk); newsk = NULL; goto drop; @@ -946,6 +947,8 @@ static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) ret = -EBUSY; else if (sk->sk_state == TCP_ESTABLISHED) ret = -EISCONN; + else if (!pn->pn_sk.sobject) + ret = -EADDRNOTAVAIL; else ret = pep_sock_enable(sk, NULL, 0); release_sock(sk); diff --git a/net/rds/connection.c b/net/rds/connection.c index c85bd6340eaa..92ff40e7a66c 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -253,6 +253,7 @@ static struct rds_connection *__rds_conn_create(struct net *net, * should end up here, but if it * does, reset/destroy the connection. */ + kfree(conn->c_path); kmem_cache_free(rds_conn_slab, conn); conn = ERR_PTR(-EOPNOTSUPP); goto out; diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 1402e9166a7e..d55d81b01d37 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -510,7 +510,7 @@ void rds_tcp_tune(struct socket *sock) sk->sk_userlocks |= SOCK_SNDBUF_LOCK; } if (rtn->rcvbuf_size > 0) { - sk->sk_sndbuf = rtn->rcvbuf_size; + sk->sk_rcvbuf = rtn->rcvbuf_size; sk->sk_userlocks |= SOCK_RCVBUF_LOCK; } release_sock(sk); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 9ff85ee8337c..80e15310f1b2 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -157,7 +157,7 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call) static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) { struct sk_buff *skb; - unsigned long resend_at, rto_j; + unsigned long resend_at; rxrpc_seq_t cursor, seq, top; ktime_t now, max_age, oldest, ack_ts; int ix; @@ -165,10 +165,8 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); - rto_j = call->peer->rto_j; - now = ktime_get_real(); - max_age = ktime_sub(now, jiffies_to_usecs(rto_j)); + max_age = ktime_sub(now, jiffies_to_usecs(call->peer->rto_j)); spin_lock_bh(&call->lock); @@ -213,7 +211,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) } resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest))); - resend_at += jiffies + rto_j; + resend_at += jiffies + rxrpc_get_rto_backoff(call->peer, retrans); WRITE_ONCE(call->resend_at, resend_at); if (unacked) diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c index b312aab80fed..91a503871116 100644 --- a/net/rxrpc/net_ns.c +++ b/net/rxrpc/net_ns.c @@ -118,6 +118,8 @@ static __net_exit void rxrpc_exit_net(struct net *net) rxnet->live = false; del_timer_sync(&rxnet->peer_keepalive_timer); cancel_work_sync(&rxnet->peer_keepalive_work); + /* Remove the timer again as the worker may have restarted it. */ + del_timer_sync(&rxnet->peer_keepalive_timer); rxrpc_destroy_all_calls(rxnet); rxrpc_destroy_all_connections(rxnet); rxrpc_destroy_all_peers(rxnet); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index f8b632a5c619..a4a6f8ee0720 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -426,7 +426,7 @@ done: if (call->peer->rtt_count > 1) { unsigned long nowj = jiffies, ack_lost_at; - ack_lost_at = rxrpc_get_rto_backoff(call->peer, retrans); + ack_lost_at = rxrpc_get_rto_backoff(call->peer, false); ack_lost_at += nowj; WRITE_ONCE(call->ack_lost_at, ack_lost_at); rxrpc_reduce_call_timer(call, ack_lost_at, nowj, diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index e011594adcd1..23d0bc4ca319 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -297,6 +297,12 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_sock *rx, return peer; } +static void rxrpc_free_peer(struct rxrpc_peer *peer) +{ + rxrpc_put_local(peer->local); + kfree_rcu(peer, rcu); +} + /* * Set up a new incoming peer. There shouldn't be any other matching peers * since we've already done a search in the list from the non-reentrant context @@ -363,7 +369,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx, spin_unlock_bh(&rxnet->peer_hash_lock); if (peer) - kfree(candidate); + rxrpc_free_peer(candidate); else peer = candidate; } @@ -418,8 +424,7 @@ static void __rxrpc_put_peer(struct rxrpc_peer *peer) list_del_init(&peer->keepalive_link); spin_unlock_bh(&rxnet->peer_hash_lock); - rxrpc_put_local(peer->local); - kfree_rcu(peer, rcu); + rxrpc_free_peer(peer); } /* @@ -455,8 +460,7 @@ void rxrpc_put_peer_locked(struct rxrpc_peer *peer) if (n == 0) { hash_del_rcu(&peer->hash_link); list_del_init(&peer->keepalive_link); - rxrpc_put_local(peer->local); - kfree_rcu(peer, rcu); + rxrpc_free_peer(peer); } } diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 75132d0ca887..ab277ee95032 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -652,15 +652,24 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, restart_act_graph: for (i = 0; i < nr_actions; i++) { const struct tc_action *a = actions[i]; + int repeat_ttl; if (jmp_prgcnt > 0) { jmp_prgcnt -= 1; continue; } + + repeat_ttl = 32; repeat: ret = a->ops->act(skb, a, res); - if (ret == TC_ACT_REPEAT) - goto repeat; /* we need a ttl - JHS */ + + if (unlikely(ret == TC_ACT_REPEAT)) { + if (--repeat_ttl != 0) + goto repeat; + /* suspicious opcode, stop pipeline */ + net_warn_ratelimited("TC_ACT_REPEAT abuse ?\n"); + return TC_ACT_OK; + } if (TC_ACT_EXT_CMP(ret, TC_ACT_JUMP)) { jmp_prgcnt = ret & TCA_ACT_MAX_PRIO_MASK; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index e3ff884a48c5..b87d2a1ee0b1 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -19,6 +19,7 @@ #include <linux/if_arp.h> #include <net/net_namespace.h> #include <net/netlink.h> +#include <net/dst.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> #include <linux/tc_act/tc_mirred.h> @@ -218,6 +219,7 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, bool want_ingress; bool is_redirect; bool expects_nh; + bool at_ingress; int m_eaction; int mac_len; bool at_nh; @@ -253,7 +255,8 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, * ingress - that covers the TC S/W datapath. */ is_redirect = tcf_mirred_is_act_redirect(m_eaction); - use_reinsert = skb_at_tc_ingress(skb) && is_redirect && + at_ingress = skb_at_tc_ingress(skb); + use_reinsert = at_ingress && is_redirect && tcf_mirred_can_reinsert(retval); if (!use_reinsert) { skb2 = skb_clone(skb, GFP_ATOMIC); @@ -261,10 +264,12 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, goto out; } + want_ingress = tcf_mirred_act_wants_ingress(m_eaction); + /* All mirred/redirected skbs should clear previous ct info */ nf_reset_ct(skb2); - - want_ingress = tcf_mirred_act_wants_ingress(m_eaction); + if (want_ingress && !at_ingress) /* drop dst for egress -> ingress */ + skb_dst_drop(skb2); expects_nh = want_ingress || !m_mac_header_xmit; at_nh = skb->data == skb_network_header(skb); diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 74450b0f69fc..214f4efdd992 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -265,14 +265,12 @@ tcf_sample_get_group(const struct tc_action *a, struct tcf_sample *s = to_sample(a); struct psample_group *group; - spin_lock_bh(&s->tcf_lock); group = rcu_dereference_protected(s->psample_group, lockdep_is_held(&s->tcf_lock)); if (group) { psample_group_take(group); *destructor = tcf_psample_group_put; } - spin_unlock_bh(&s->tcf_lock); return group; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 7f20fd37e01e..919c7fa5f02d 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1639,10 +1639,10 @@ static int tcf_chain_tp_insert(struct tcf_chain *chain, if (chain->flushing) return -EAGAIN; + RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info)); if (*chain_info->pprev == chain->filter_chain) tcf_chain0_head_change(chain, tp); tcf_proto_get(tp); - RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info)); rcu_assign_pointer(*chain_info->pprev, tp); return 0; @@ -1928,9 +1928,9 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, bool prio_allocate; u32 parent; u32 chain_index; - struct Qdisc *q = NULL; + struct Qdisc *q; struct tcf_chain_info chain_info; - struct tcf_chain *chain = NULL; + struct tcf_chain *chain; struct tcf_block *block; struct tcf_proto *tp; unsigned long cl; @@ -1958,6 +1958,8 @@ replay: tp = NULL; cl = 0; block = NULL; + q = NULL; + chain = NULL; if (prio == 0) { /* If no priority is provided by the user, @@ -2764,8 +2766,8 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n, struct tcmsg *t; u32 parent; u32 chain_index; - struct Qdisc *q = NULL; - struct tcf_chain *chain = NULL; + struct Qdisc *q; + struct tcf_chain *chain; struct tcf_block *block; unsigned long cl; int err; @@ -2775,6 +2777,7 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n, return -EPERM; replay: + q = NULL; err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) @@ -3436,7 +3439,7 @@ static void tcf_sample_get_group(struct flow_action_entry *entry, int tc_setup_flow_action(struct flow_action *flow_action, const struct tcf_exts *exts, bool rtnl_held) { - const struct tc_action *act; + struct tc_action *act; int i, j, k, err = 0; if (!exts) @@ -3450,6 +3453,7 @@ int tc_setup_flow_action(struct flow_action *flow_action, struct flow_action_entry *entry; entry = &flow_action->entries[j]; + spin_lock_bh(&act->tcfa_lock); if (is_tcf_gact_ok(act)) { entry->id = FLOW_ACTION_ACCEPT; } else if (is_tcf_gact_shot(act)) { @@ -3490,13 +3494,13 @@ int tc_setup_flow_action(struct flow_action *flow_action, break; default: err = -EOPNOTSUPP; - goto err_out; + goto err_out_locked; } } else if (is_tcf_tunnel_set(act)) { entry->id = FLOW_ACTION_TUNNEL_ENCAP; err = tcf_tunnel_encap_get_tunnel(entry, act); if (err) - goto err_out; + goto err_out_locked; } else if (is_tcf_tunnel_release(act)) { entry->id = FLOW_ACTION_TUNNEL_DECAP; } else if (is_tcf_pedit(act)) { @@ -3510,7 +3514,7 @@ int tc_setup_flow_action(struct flow_action *flow_action, break; default: err = -EOPNOTSUPP; - goto err_out; + goto err_out_locked; } entry->mangle.htype = tcf_pedit_htype(act, k); entry->mangle.mask = tcf_pedit_mask(act, k); @@ -3561,15 +3565,17 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->mpls_mangle.ttl = tcf_mpls_ttl(act); break; default: - goto err_out; + err = -EOPNOTSUPP; + goto err_out_locked; } } else if (is_tcf_skbedit_ptype(act)) { entry->id = FLOW_ACTION_PTYPE; entry->ptype = tcf_skbedit_ptype(act); } else { err = -EOPNOTSUPP; - goto err_out; + goto err_out_locked; } + spin_unlock_bh(&act->tcfa_lock); if (!is_tcf_pedit(act)) j++; @@ -3583,6 +3589,9 @@ err_out: tc_cleanup_flow_action(flow_action); return err; +err_out_locked: + spin_unlock_bh(&act->tcfa_lock); + goto err_out; } EXPORT_SYMBOL(tc_setup_flow_action); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 26979b4853bd..007fbc199352 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -784,6 +784,7 @@ static int fl_set_key_mpls(struct nlattr **tb, static void fl_set_key_vlan(struct nlattr **tb, __be16 ethertype, int vlan_id_key, int vlan_prio_key, + int vlan_next_eth_type_key, struct flow_dissector_key_vlan *key_val, struct flow_dissector_key_vlan *key_mask) { @@ -802,6 +803,11 @@ static void fl_set_key_vlan(struct nlattr **tb, } key_val->vlan_tpid = ethertype; key_mask->vlan_tpid = cpu_to_be16(~0); + if (tb[vlan_next_eth_type_key]) { + key_val->vlan_eth_type = + nla_get_be16(tb[vlan_next_eth_type_key]); + key_mask->vlan_eth_type = cpu_to_be16(~0); + } } static void fl_set_key_flag(u32 flower_key, u32 flower_mask, @@ -1076,8 +1082,9 @@ static int fl_set_key(struct net *net, struct nlattr **tb, if (eth_type_vlan(ethertype)) { fl_set_key_vlan(tb, ethertype, TCA_FLOWER_KEY_VLAN_ID, - TCA_FLOWER_KEY_VLAN_PRIO, &key->vlan, - &mask->vlan); + TCA_FLOWER_KEY_VLAN_PRIO, + TCA_FLOWER_KEY_VLAN_ETH_TYPE, + &key->vlan, &mask->vlan); if (tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]) { ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]); @@ -1085,6 +1092,7 @@ static int fl_set_key(struct net *net, struct nlattr **tb, fl_set_key_vlan(tb, ethertype, TCA_FLOWER_KEY_CVLAN_ID, TCA_FLOWER_KEY_CVLAN_PRIO, + TCA_FLOWER_KEY_CVLAN_ETH_TYPE, &key->cvlan, &mask->cvlan); fl_set_key_val(tb, &key->basic.n_proto, TCA_FLOWER_KEY_CVLAN_ETH_TYPE, @@ -2272,13 +2280,13 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net, goto nla_put_failure; if (mask->basic.n_proto) { - if (mask->cvlan.vlan_tpid) { + if (mask->cvlan.vlan_eth_type) { if (nla_put_be16(skb, TCA_FLOWER_KEY_CVLAN_ETH_TYPE, key->basic.n_proto)) goto nla_put_failure; - } else if (mask->vlan.vlan_tpid) { + } else if (mask->vlan.vlan_eth_type) { if (nla_put_be16(skb, TCA_FLOWER_KEY_VLAN_ETH_TYPE, - key->basic.n_proto)) + key->vlan.vlan_eth_type)) goto nla_put_failure; } } diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index e15ff335953d..ed8d26e6468c 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -386,14 +386,19 @@ static int u32_init(struct tcf_proto *tp) return 0; } -static int u32_destroy_key(struct tc_u_knode *n, bool free_pf) +static void __u32_destroy_key(struct tc_u_knode *n) { struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); tcf_exts_destroy(&n->exts); - tcf_exts_put_net(&n->exts); if (ht && --ht->refcnt == 0) kfree(ht); + kfree(n); +} + +static void u32_destroy_key(struct tc_u_knode *n, bool free_pf) +{ + tcf_exts_put_net(&n->exts); #ifdef CONFIG_CLS_U32_PERF if (free_pf) free_percpu(n->pf); @@ -402,8 +407,7 @@ static int u32_destroy_key(struct tc_u_knode *n, bool free_pf) if (free_pf) free_percpu(n->pcpu_success); #endif - kfree(n); - return 0; + __u32_destroy_key(n); } /* u32_delete_key_rcu should be called when free'ing a copied @@ -812,10 +816,6 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp, new->flags = n->flags; RCU_INIT_POINTER(new->ht_down, ht); - /* bump reference count as long as we hold pointer to structure */ - if (ht) - ht->refcnt++; - #ifdef CONFIG_CLS_U32_PERF /* Statistics may be incremented by readers during update * so we must keep them in tact. When the node is later destroyed @@ -837,6 +837,10 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp, return NULL; } + /* bump reference count as long as we hold pointer to structure */ + if (ht) + ht->refcnt++; + return new; } @@ -903,13 +907,13 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, tca[TCA_RATE], ovr, extack); if (err) { - u32_destroy_key(new, false); + __u32_destroy_key(new); return err; } err = u32_replace_hw_knode(tp, new, flags, extack); if (err) { - u32_destroy_key(new, false); + __u32_destroy_key(new); return err; } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index e70f99033408..6f36df85d23d 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1195,7 +1195,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev, err = -ENOENT; if (!ops) { - NL_SET_ERR_MSG(extack, "Specified qdisc not found"); + NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown"); goto err_out; } diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index e8eebe40e0ae..0eb4d4a568f7 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -2724,7 +2724,7 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt, q->tins = kvcalloc(CAKE_MAX_TINS, sizeof(struct cake_tin_data), GFP_KERNEL); if (!q->tins) - goto nomem; + return -ENOMEM; for (i = 0; i < CAKE_MAX_TINS; i++) { struct cake_tin_data *b = q->tins + i; @@ -2754,10 +2754,6 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt, q->min_netlen = ~0; q->min_adjlen = ~0; return 0; - -nomem: - cake_destroy(sch); - return -ENOMEM; } static int cake_dump(struct Qdisc *sch, struct sk_buff *skb) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index a062cf0977a7..a6795e2a2ac7 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1403,6 +1403,7 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r, { memset(r, 0, sizeof(*r)); r->overhead = conf->overhead; + r->mpu = conf->mpu; r->rate_bytes_ps = max_t(u64, conf->rate, rate64); r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK); r->mult = 1; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index b046fd3cac2c..1eb339d224ae 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1421,10 +1421,8 @@ static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt, if (err < 0) return err; - if (qdisc_dev(sch)->tx_queue_len + 1 > QFQ_MAX_AGG_CLASSES) - max_classes = QFQ_MAX_AGG_CLASSES; - else - max_classes = qdisc_dev(sch)->tx_queue_len + 1; + max_classes = min_t(u64, (u64)qdisc_dev(sch)->tx_queue_len + 1, + QFQ_MAX_AGG_CLASSES); /* max_cl_shift = floor(log_2(max_classes)) */ max_cl_shift = __fls(max_classes); q->max_agg_classes = 1<<max_cl_shift; diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index b268e6130451..4c26f7fb32b3 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -427,7 +427,8 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (unlikely(!child)) return qdisc_drop(skb, sch, to_free); - if (skb->sk && sock_flag(skb->sk, SOCK_TXTIME)) { + /* sk_flags are only safe to use on full sockets. */ + if (skb->sk && sk_fullsock(skb->sk) && sock_flag(skb->sk, SOCK_TXTIME)) { if (!is_valid_interval(skb, sch)) return qdisc_drop(skb, sch, to_free); } else if (TXTIME_ASSIST_IS_ENABLED(q->flags)) { diff --git a/net/sctp/diag.c b/net/sctp/diag.c index ba9f64fdfd23..5a918e74bb82 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -61,10 +61,6 @@ static void inet_diag_msg_sctpasoc_fill(struct inet_diag_msg *r, r->idiag_timer = SCTP_EVENT_TIMEOUT_T3_RTX; r->idiag_retrans = asoc->rtx_data_chunks; r->idiag_expires = jiffies_to_msecs(t3_rtx->expires - jiffies); - } else { - r->idiag_timer = 0; - r->idiag_retrans = 0; - r->idiag_expires = 0; } } @@ -144,13 +140,14 @@ static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc, r = nlmsg_data(nlh); BUG_ON(!sk_fullsock(sk)); + r->idiag_timer = 0; + r->idiag_retrans = 0; + r->idiag_expires = 0; if (asoc) { inet_diag_msg_sctpasoc_fill(r, sk, asoc); } else { inet_diag_msg_common_fill(r, sk); r->idiag_state = sk->sk_state; - r->idiag_timer = 0; - r->idiag_retrans = 0; } if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin)) @@ -292,9 +289,8 @@ out: return err; } -static int sctp_sock_dump(struct sctp_transport *tsp, void *p) +static int sctp_sock_dump(struct sctp_endpoint *ep, struct sctp_transport *tsp, void *p) { - struct sctp_endpoint *ep = tsp->asoc->ep; struct sctp_comm_param *commp = p; struct sock *sk = ep->base.sk; struct sk_buff *skb = commp->skb; @@ -304,6 +300,8 @@ static int sctp_sock_dump(struct sctp_transport *tsp, void *p) int err = 0; lock_sock(sk); + if (ep != tsp->asoc->ep) + goto release; list_for_each_entry(assoc, &ep->asocs, asocs) { if (cb->args[4] < cb->args[1]) goto next; @@ -346,9 +344,8 @@ release: return err; } -static int sctp_sock_filter(struct sctp_transport *tsp, void *p) +static int sctp_sock_filter(struct sctp_endpoint *ep, struct sctp_transport *tsp, void *p) { - struct sctp_endpoint *ep = tsp->asoc->ep; struct sctp_comm_param *commp = p; struct sock *sk = ep->base.sk; const struct inet_diag_req_v2 *r = commp->r; @@ -506,8 +503,8 @@ skip: if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE))) goto done; - sctp_for_each_transport(sctp_sock_filter, sctp_sock_dump, - net, &pos, &commp); + sctp_transport_traverse_process(sctp_sock_filter, sctp_sock_dump, + net, &pos, &commp); cb->args[2] = pos; done: diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 3067deb0fbec..665a22d5c725 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -184,6 +184,18 @@ void sctp_endpoint_free(struct sctp_endpoint *ep) } /* Final destructor for endpoint. */ +static void sctp_endpoint_destroy_rcu(struct rcu_head *head) +{ + struct sctp_endpoint *ep = container_of(head, struct sctp_endpoint, rcu); + struct sock *sk = ep->base.sk; + + sctp_sk(sk)->ep = NULL; + sock_put(sk); + + kfree(ep); + SCTP_DBG_OBJCNT_DEC(ep); +} + static void sctp_endpoint_destroy(struct sctp_endpoint *ep) { struct sock *sk; @@ -213,18 +225,13 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) if (sctp_sk(sk)->bind_hash) sctp_put_port(sk); - sctp_sk(sk)->ep = NULL; - /* Give up our hold on the sock */ - sock_put(sk); - - kfree(ep); - SCTP_DBG_OBJCNT_DEC(ep); + call_rcu(&ep->rcu, sctp_endpoint_destroy_rcu); } /* Hold a reference to an endpoint. */ -void sctp_endpoint_hold(struct sctp_endpoint *ep) +int sctp_endpoint_hold(struct sctp_endpoint *ep) { - refcount_inc(&ep->base.refcnt); + return refcount_inc_not_zero(&ep->base.refcnt); } /* Release a reference to an endpoint and clean up if there are diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 0d225f891b61..8d32229199b9 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -458,6 +458,10 @@ void sctp_generate_reconf_event(struct timer_list *t) goto out_unlock; } + /* This happens when the response arrives after the timer is triggered. */ + if (!asoc->strreset_chunk) + goto out_unlock; + error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT, SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_RECONF), asoc->state, asoc->ep, asoc, diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 7c6dcbc8e98b..1d2f633c6c7c 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -149,6 +149,12 @@ static enum sctp_disposition __sctp_sf_do_9_1_abort( void *arg, struct sctp_cmd_seq *commands); +static enum sctp_disposition +__sctp_sf_do_9_2_reshutack(struct net *net, const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, void *arg, + struct sctp_cmd_seq *commands); + /* Small helper function that checks if the chunk length * is of the appropriate length. The 'required_length' argument * is set to be the size of a specific chunk we are testing. @@ -330,6 +336,14 @@ enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net, if (!chunk->singleton) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Make sure that the INIT chunk has a valid length. + * Normally, this would cause an ABORT with a Protocol Violation + * error, but since we don't have an association, we'll + * just discard the packet. + */ + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* If the packet is an OOTB packet which is temporarily on the * control endpoint, respond with an ABORT. */ @@ -344,14 +358,6 @@ enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net, if (chunk->sctp_hdr->vtag != 0) return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); - /* Make sure that the INIT chunk has a valid length. - * Normally, this would cause an ABORT with a Protocol Violation - * error, but since we don't have an association, we'll - * just discard the packet. - */ - if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) - return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); - /* If the INIT is coming toward a closing socket, we'll send back * and ABORT. Essentially, this catches the race of INIT being * backloged to the socket at the same time as the user isses close(). @@ -1484,19 +1490,16 @@ static enum sctp_disposition sctp_sf_do_unexpected_init( if (!chunk->singleton) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Make sure that the INIT chunk has a valid length. */ + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* 3.1 A packet containing an INIT chunk MUST have a zero Verification * Tag. */ if (chunk->sctp_hdr->vtag != 0) return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); - /* Make sure that the INIT chunk has a valid length. - * In this case, we generate a protocol violation since we have - * an association established. - */ - if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) - return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, - commands); /* Grab the INIT header. */ chunk->subh.init_hdr = (struct sctp_inithdr *)chunk->skb->data; @@ -1814,9 +1817,9 @@ static enum sctp_disposition sctp_sf_do_dupcook_a( * its peer. */ if (sctp_state(asoc, SHUTDOWN_ACK_SENT)) { - disposition = sctp_sf_do_9_2_reshutack(net, ep, asoc, - SCTP_ST_CHUNK(chunk->chunk_hdr->type), - chunk, commands); + disposition = __sctp_sf_do_9_2_reshutack(net, ep, asoc, + SCTP_ST_CHUNK(chunk->chunk_hdr->type), + chunk, commands); if (SCTP_DISPOSITION_NOMEM == disposition) goto nomem; @@ -2915,13 +2918,11 @@ enum sctp_disposition sctp_sf_do_9_2_shut_ctsn( * that belong to this association, it should discard the INIT chunk and * retransmit the SHUTDOWN ACK chunk. */ -enum sctp_disposition sctp_sf_do_9_2_reshutack( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const union sctp_subtype type, - void *arg, - struct sctp_cmd_seq *commands) +static enum sctp_disposition +__sctp_sf_do_9_2_reshutack(struct net *net, const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, void *arg, + struct sctp_cmd_seq *commands) { struct sctp_chunk *chunk = arg; struct sctp_chunk *reply; @@ -2955,6 +2956,26 @@ nomem: return SCTP_DISPOSITION_NOMEM; } +enum sctp_disposition +sctp_sf_do_9_2_reshutack(struct net *net, const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, void *arg, + struct sctp_cmd_seq *commands) +{ + struct sctp_chunk *chunk = arg; + + if (!chunk->singleton) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + + if (chunk->sctp_hdr->vtag != 0) + return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); + + return __sctp_sf_do_9_2_reshutack(net, ep, asoc, type, arg, commands); +} + /* * sctp_sf_do_ecn_cwr * diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 2146372adff4..c76b40322ac7 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5395,11 +5395,12 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), } EXPORT_SYMBOL_GPL(sctp_transport_lookup_process); -int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), - int (*cb_done)(struct sctp_transport *, void *), - struct net *net, int *pos, void *p) { +int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done, + struct net *net, int *pos, void *p) +{ struct rhashtable_iter hti; struct sctp_transport *tsp; + struct sctp_endpoint *ep; int ret; again: @@ -5408,26 +5409,32 @@ again: tsp = sctp_transport_get_idx(net, &hti, *pos + 1); for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) { - ret = cb(tsp, p); - if (ret) - break; + ep = tsp->asoc->ep; + if (sctp_endpoint_hold(ep)) { /* asoc can be peeled off */ + ret = cb(ep, tsp, p); + if (ret) + break; + sctp_endpoint_put(ep); + } (*pos)++; sctp_transport_put(tsp); } sctp_transport_walk_stop(&hti); if (ret) { - if (cb_done && !cb_done(tsp, p)) { + if (cb_done && !cb_done(ep, tsp, p)) { (*pos)++; + sctp_endpoint_put(ep); sctp_transport_put(tsp); goto again; } + sctp_endpoint_put(ep); sctp_transport_put(tsp); } return ret; } -EXPORT_SYMBOL_GPL(sctp_for_each_transport); +EXPORT_SYMBOL_GPL(sctp_transport_traverse_process); /* 7.2.1 Association Status (SCTP_STATUS) @@ -5675,7 +5682,7 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp) * Set the daddr and initialize id to something more random and also * copy over any ip options. */ - sp->pf->to_sk_daddr(&asoc->peer.primary_addr, sk); + sp->pf->to_sk_daddr(&asoc->peer.primary_addr, sock->sk); sp->pf->copy_ip_options(sk, sock->sk); /* Populate the fields of the newsk from the oldsk and migrate the diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 6b0f09c5b195..a5a8cca46bd5 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -183,7 +183,9 @@ static int smc_release(struct socket *sock) /* cleanup for a dangling non-blocking connect */ if (smc->connect_nonblock && sk->sk_state == SMC_INIT) tcp_abort(smc->clcsock->sk, ECONNABORTED); - flush_work(&smc->connect_work); + + if (cancel_work_sync(&smc->connect_work)) + sock_put(&smc->sk); /* sock_hold in smc_connect for passive closing */ if (sk->sk_state == SMC_LISTEN) /* smc_close_non_accepted() is called and acquires @@ -467,12 +469,26 @@ static void smc_link_save_peer_info(struct smc_link *link, static void smc_switch_to_fallback(struct smc_sock *smc) { + wait_queue_head_t *smc_wait = sk_sleep(&smc->sk); + wait_queue_head_t *clc_wait = sk_sleep(smc->clcsock->sk); + unsigned long flags; + smc->use_fallback = true; if (smc->sk.sk_socket && smc->sk.sk_socket->file) { smc->clcsock->file = smc->sk.sk_socket->file; smc->clcsock->file->private_data = smc->clcsock; smc->clcsock->wq.fasync_list = smc->sk.sk_socket->wq.fasync_list; + + /* There may be some entries remaining in + * smc socket->wq, which should be removed + * to clcsocket->wq during the fallback. + */ + spin_lock_irqsave(&smc_wait->lock, flags); + spin_lock_nested(&clc_wait->lock, SINGLE_DEPTH_NESTING); + list_splice_init(&smc_wait->head, &clc_wait->head); + spin_unlock(&clc_wait->lock); + spin_unlock_irqrestore(&smc_wait->lock, flags); } } @@ -800,6 +816,8 @@ static void smc_connect_work(struct work_struct *work) smc->sk.sk_state = SMC_CLOSED; if (rc == -EPIPE || rc == -EAGAIN) smc->sk.sk_err = EPIPE; + else if (rc == -ECONNREFUSED) + smc->sk.sk_err = ECONNREFUSED; else if (signal_pending(current)) smc->sk.sk_err = -sock_intr_errno(timeo); sock_put(&smc->sk); /* passive closing */ @@ -1658,8 +1676,10 @@ static __poll_t smc_poll(struct file *file, struct socket *sock, static int smc_shutdown(struct socket *sock, int how) { struct sock *sk = sock->sk; + bool do_shutdown = true; struct smc_sock *smc; int rc = -EINVAL; + int old_state; int rc1 = 0; smc = smc_sk(sk); @@ -1680,13 +1700,19 @@ static int smc_shutdown(struct socket *sock, int how) if (smc->use_fallback) { rc = kernel_sock_shutdown(smc->clcsock, how); sk->sk_shutdown = smc->clcsock->sk->sk_shutdown; - if (sk->sk_shutdown == SHUTDOWN_MASK) + if (sk->sk_shutdown == SHUTDOWN_MASK) { sk->sk_state = SMC_CLOSED; + sock_put(sk); + } goto out; } switch (how) { case SHUT_RDWR: /* shutdown in both directions */ + old_state = sk->sk_state; rc = smc_close_active(smc); + if (old_state == SMC_ACTIVE && + sk->sk_state == SMC_PEERCLOSEWAIT1) + do_shutdown = false; break; case SHUT_WR: rc = smc_close_shutdown_write(smc); @@ -1696,7 +1722,7 @@ static int smc_shutdown(struct socket *sock, int how) /* nothing more to do because peer is not involved */ break; } - if (smc->clcsock) + if (do_shutdown && smc->clcsock) rc1 = kernel_sock_shutdown(smc->clcsock, how); /* map sock_shutdown_cmd constants to sk_shutdown value range */ sk->sk_shutdown |= how + 1; diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index fc06720b53c1..543948d970c5 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -183,6 +183,7 @@ int smc_close_active(struct smc_sock *smc) int old_state; long timeout; int rc = 0; + int rc1 = 0; timeout = current->flags & PF_EXITING ? 0 : sock_flag(sk, SOCK_LINGER) ? @@ -218,6 +219,15 @@ again: if (rc) break; sk->sk_state = SMC_PEERCLOSEWAIT1; + + /* actively shutdown clcsock before peer close it, + * prevent peer from entering TIME_WAIT state. + */ + if (smc->clcsock && smc->clcsock->sk) { + rc1 = kernel_sock_shutdown(smc->clcsock, + SHUT_RDWR); + rc = rc ? rc : rc1; + } } else { /* peer event has changed the state */ goto again; diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index aeea67f90841..66cdfd5725ac 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -342,8 +342,8 @@ void smc_conn_free(struct smc_connection *conn) } else { smc_cdc_tx_dismiss_slots(conn); } - smc_lgr_unregister_conn(conn); smc_buf_unuse(conn, lgr); /* allow buffer reuse */ + smc_lgr_unregister_conn(conn); conn->lgr = NULL; if (!lgr->conns_num) @@ -632,7 +632,8 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) !lgr->sync_err && lgr->vlan_id == ini->vlan_id && (role == SMC_CLNT || - lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) { + (lgr->conns_num < SMC_RMBS_PER_LGR_MAX && + !bitmap_full(lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX)))) { /* link group found */ ini->cln_first_contact = SMC_REUSE_CONTACT; conn->lgr = lgr; @@ -733,7 +734,7 @@ static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize, */ static inline int smc_rmb_wnd_update_limit(int rmbe_size) { - return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); + return max_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); } static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr, diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 571e6d84da3b..660608202f28 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -295,8 +295,9 @@ static struct smc_ib_device *smc_pnet_find_ib(char *ib_name) list_for_each_entry(ibdev, &smc_ib_devices.list, list) { if (!strncmp(ibdev->ibdev->name, ib_name, sizeof(ibdev->ibdev->name)) || - !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name, - IB_DEVICE_NAME_MAX - 1)) { + (ibdev->ibdev->dev.parent && + !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name, + IB_DEVICE_NAME_MAX - 1))) { goto out; } } diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b6039642df67..08e1ccc01e98 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2223,6 +2223,7 @@ call_transmit_status(struct rpc_task *task) * socket just returned a connection error, * then hold onto the transport lock. */ + case -ENOMEM: case -ENOBUFS: rpc_delay(task, HZ>>2); /* fall through */ @@ -2308,6 +2309,7 @@ call_bc_transmit_status(struct rpc_task *task) case -ENOTCONN: case -EPIPE: break; + case -ENOMEM: case -ENOBUFS: rpc_delay(task, HZ>>2); /* fall through */ @@ -2392,6 +2394,11 @@ call_status(struct rpc_task *task) case -EPIPE: case -EAGAIN: break; + case -ENFILE: + case -ENOBUFS: + case -ENOMEM: + rpc_delay(task, HZ>>2); + break; case -EIO: /* shutdown or soft timeout */ goto out_exit; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 37792675ed57..3b825942e2f6 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -599,9 +599,9 @@ static int __rpc_rmdir(struct inode *dir, struct dentry *dentry) dget(dentry); ret = simple_rmdir(dir, dentry); + d_drop(dentry); if (!ret) fsnotify_rmdir(dir, dentry); - d_delete(dentry); dput(dentry); return ret; } @@ -612,9 +612,9 @@ static int __rpc_unlink(struct inode *dir, struct dentry *dentry) dget(dentry); ret = simple_unlink(dir, dentry); + d_drop(dentry); if (!ret) fsnotify_unlink(dir, dentry); - d_delete(dentry); dput(dentry); return ret; } diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 8fc4a6b3422f..32ffa801a5b9 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -1039,8 +1039,10 @@ int rpc_malloc(struct rpc_task *task) struct rpc_buffer *buf; gfp_t gfp = GFP_NOFS; + if (RPC_IS_ASYNC(task)) + gfp = GFP_NOWAIT | __GFP_NOWARN; if (RPC_IS_SWAPPER(task)) - gfp = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; + gfp |= __GFP_MEMALLOC; size += sizeof(struct rpc_buffer); if (size <= RPC_BUFFER_MAXSIZE) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 93b6afd28405..8ac579778e48 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -2006,7 +2006,14 @@ static void xprt_destroy(struct rpc_xprt *xprt) */ wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_UNINTERRUPTIBLE); + /* + * xprt_schedule_autodisconnect() can run after XPRT_LOCKED + * is cleared. We use ->transport_lock to ensure the mod_timer() + * can only run *before* del_time_sync(), never after. + */ + spin_lock(&xprt->transport_lock); del_timer_sync(&xprt->timer); + spin_unlock(&xprt->transport_lock); /* * Destroy sockets etc from the system workqueue so they can diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 2f21e3c52bfc..866bcd99bdc0 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -626,8 +626,10 @@ xprt_rdma_allocate(struct rpc_task *task) gfp_t flags; flags = RPCRDMA_DEF_GFP; + if (RPC_IS_ASYNC(task)) + flags = GFP_NOWAIT | __GFP_NOWARN; if (RPC_IS_SWAPPER(task)) - flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; + flags |= __GFP_MEMALLOC; if (!rpcrdma_check_regbuf(r_xprt, req->rl_sendbuf, rqst->rq_callsize, flags)) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 8ffc54b6661f..43bc02dea80c 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -872,12 +872,12 @@ out: /** * xs_nospace - handle transmit was incomplete * @req: pointer to RPC request + * @transport: pointer to struct sock_xprt * */ -static int xs_nospace(struct rpc_rqst *req) +static int xs_nospace(struct rpc_rqst *req, struct sock_xprt *transport) { - struct rpc_xprt *xprt = req->rq_xprt; - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + struct rpc_xprt *xprt = &transport->xprt; struct sock *sk = transport->inet; int ret = -EAGAIN; @@ -891,25 +891,49 @@ static int xs_nospace(struct rpc_rqst *req) /* Don't race with disconnect */ if (xprt_connected(xprt)) { + struct socket_wq *wq; + + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + set_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags); + rcu_read_unlock(); + /* wait for more buffer space */ + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); sk->sk_write_pending++; xprt_wait_for_buffer_space(xprt); } else ret = -ENOTCONN; spin_unlock(&xprt->transport_lock); + return ret; +} - /* Race breaker in case memory is freed before above code is called */ - if (ret == -EAGAIN) { - struct socket_wq *wq; +static int xs_sock_nospace(struct rpc_rqst *req) +{ + struct sock_xprt *transport = + container_of(req->rq_xprt, struct sock_xprt, xprt); + struct sock *sk = transport->inet; + int ret = -EAGAIN; - rcu_read_lock(); - wq = rcu_dereference(sk->sk_wq); - set_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags); - rcu_read_unlock(); + lock_sock(sk); + if (!sock_writeable(sk)) + ret = xs_nospace(req, transport); + release_sock(sk); + return ret; +} - sk->sk_write_space(sk); - } +static int xs_stream_nospace(struct rpc_rqst *req) +{ + struct sock_xprt *transport = + container_of(req->rq_xprt, struct sock_xprt, xprt); + struct sock *sk = transport->inet; + int ret = -EAGAIN; + + lock_sock(sk); + if (!sk_stream_memory_free(sk)) + ret = xs_nospace(req, transport); + release_sock(sk); return ret; } @@ -996,7 +1020,7 @@ static int xs_local_send_request(struct rpc_rqst *req) case -ENOBUFS: break; case -EAGAIN: - status = xs_nospace(req); + status = xs_stream_nospace(req); break; default: dprintk("RPC: sendmsg returned unrecognized error %d\n", @@ -1068,7 +1092,7 @@ process_status: /* Should we call xs_close() here? */ break; case -EAGAIN: - status = xs_nospace(req); + status = xs_sock_nospace(req); break; case -ENETUNREACH: case -ENOBUFS: @@ -1181,7 +1205,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req) /* Should we call xs_close() here? */ break; case -EAGAIN: - status = xs_nospace(req); + status = xs_stream_nospace(req); break; case -ECONNRESET: case -ECONNREFUSED: @@ -2939,9 +2963,6 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args) } xprt_set_bound(xprt); xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL); - ret = ERR_PTR(xs_local_setup_socket(transport)); - if (ret) - goto out_err; break; default: ret = ERR_PTR(-EAFNOSUPPORT); diff --git a/net/tipc/link.c b/net/tipc/link.c index f25010261a9e..8f2ee71c63c6 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1953,15 +1953,18 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, u16 peers_tol = msg_link_tolerance(hdr); u16 peers_prio = msg_linkprio(hdr); u16 rcv_nxt = l->rcv_nxt; - u16 dlen = msg_data_sz(hdr); + u32 dlen = msg_data_sz(hdr), glen = 0; int mtyp = msg_type(hdr); bool reply = msg_probe(hdr); - u16 glen = 0; void *data; char *if_name; int rc = 0; trace_tipc_proto_rcv(skb, false, l->name); + + if (dlen > U16_MAX) + goto exit; + if (tipc_link_is_blocked(l) || !xmitq) goto exit; @@ -2063,7 +2066,8 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, if (glen != tipc_gap_ack_blks_sz(ga->gack_cnt)) ga = NULL; } - + if(glen > dlen) + break; tipc_mon_rcv(l->net, data + glen, dlen - glen, l->addr, &l->mon_state, l->bearer_id); diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index 58708b4c7719..e7155a774300 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -457,6 +457,8 @@ void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr, state->probing = false; /* Sanity check received domain record */ + if (new_member_cnt > MAX_MON_DOMAIN) + return; if (dlen < dom_rec_len(arrv_dom, 0)) return; if (dlen != dom_rec_len(arrv_dom, new_member_cnt)) diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 836e629e8f4a..661bc2551a0a 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -290,7 +290,7 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i, pr_warn_ratelimited("Failed to remove binding %u,%u from %x\n", type, lower, node); } else { - pr_warn("Unrecognized name table message received\n"); + pr_warn_ratelimited("Unknown name table message received\n"); } return false; } diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 66a65c2cdb23..c52083522b28 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -812,7 +812,7 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg, list_for_each_entry(p, &sr->all_publ, all_publ) if (p->key == *last_key) break; - if (p->key != *last_key) + if (list_entry_is_head(p, &sr->all_publ, all_publ)) return -EPIPE; } else { p = list_first_entry(&sr->all_publ, diff --git a/net/tipc/socket.c b/net/tipc/socket.c index fbbac9ba2862..d543c4556df2 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2698,7 +2698,8 @@ static void tipc_sk_retry_connect(struct sock *sk, struct sk_buff_head *list) /* Try again later if dest link is congested */ if (tsk->cong_link_cnt) { - sk_reset_timer(sk, &sk->sk_timer, msecs_to_jiffies(100)); + sk_reset_timer(sk, &sk->sk_timer, + jiffies + msecs_to_jiffies(100)); return; } /* Prepare SYN for retransmit */ @@ -3590,7 +3591,7 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb, if (p->key == *last_publ) break; } - if (p->key != *last_publ) { + if (list_entry_is_head(p, &tsk->publications, binding_sock)) { /* We never set seq or call nl_dump_check_consistent() * this means that setting prev_seq here will cause the * consistence check to fail in the netlink callback diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 0f034c3bc37d..abb93f7343c5 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -470,11 +470,13 @@ handle_error: copy = min_t(size_t, size, (pfrag->size - pfrag->offset)); copy = min_t(size_t, copy, (max_open_record_len - record->len)); - rc = tls_device_copy_data(page_address(pfrag->page) + - pfrag->offset, copy, msg_iter); - if (rc) - goto handle_error; - tls_append_frag(record, pfrag, copy); + if (copy) { + rc = tls_device_copy_data(page_address(pfrag->page) + + pfrag->offset, copy, msg_iter); + if (rc) + goto handle_error; + tls_append_frag(record, pfrag, copy); + } size -= copy; if (!size) { diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 02821b914054..af3be9a29d6d 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -512,7 +512,7 @@ static int tls_do_encryption(struct sock *sk, memcpy(&rec->iv_data[iv_offset], tls_ctx->tx.iv, prot->iv_size + prot->salt_size); - xor_iv_with_seq(prot->version, rec->iv_data, tls_ctx->tx.rec_seq); + xor_iv_with_seq(prot->version, rec->iv_data + iv_offset, tls_ctx->tx.rec_seq); sge->offset += prot->prepend_size; sge->length -= prot->prepend_size; @@ -1479,11 +1479,11 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, } if (prot->version == TLS_1_3_VERSION) memcpy(iv + iv_offset, tls_ctx->rx.iv, - crypto_aead_ivsize(ctx->aead_recv)); + prot->iv_size + prot->salt_size); else memcpy(iv + iv_offset, tls_ctx->rx.iv, prot->salt_size); - xor_iv_with_seq(prot->version, iv, tls_ctx->rx.rec_seq); + xor_iv_with_seq(prot->version, iv + iv_offset, tls_ctx->rx.rec_seq); /* Prepare AAD */ tls_make_aad(aad, rxm->full_len - prot->overhead_size + diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 12e2ddaf887f..d45d5366115a 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -192,8 +192,11 @@ void wait_for_unix_gc(void) { /* If number of inflight sockets is insane, * force a garbage collect right now. + * Paired with the WRITE_ONCE() in unix_inflight(), + * unix_notinflight() and gc_in_progress(). */ - if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress) + if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC && + !READ_ONCE(gc_in_progress)) unix_gc(); wait_event(unix_gc_wait, gc_in_progress == false); } @@ -213,7 +216,9 @@ void unix_gc(void) if (gc_in_progress) goto out; - gc_in_progress = true; + /* Paired with READ_ONCE() in wait_for_unix_gc(). */ + WRITE_ONCE(gc_in_progress, true); + /* First, select candidates for garbage collection. Only * in-flight sockets are considered, and from those only ones * which don't have any external reference. @@ -299,7 +304,10 @@ void unix_gc(void) /* All candidates should have been detached by now. */ BUG_ON(!list_empty(&gc_candidates)); - gc_in_progress = false; + + /* Paired with READ_ONCE() in wait_for_unix_gc(). */ + WRITE_ONCE(gc_in_progress, false); + wake_up(&unix_gc_wait); out: diff --git a/net/unix/scm.c b/net/unix/scm.c index 8c40f2b32392..ce700b22ecce 100644 --- a/net/unix/scm.c +++ b/net/unix/scm.c @@ -59,7 +59,8 @@ void unix_inflight(struct user_struct *user, struct file *fp) } else { BUG_ON(list_empty(&u->link)); } - unix_tot_inflight++; + /* Paired with READ_ONCE() in wait_for_unix_gc() */ + WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1); } user->unix_inflight++; spin_unlock(&unix_gc_lock); @@ -79,7 +80,8 @@ void unix_notinflight(struct user_struct *user, struct file *fp) if (atomic_long_dec_and_test(&u->inflight)) list_del_init(&u->link); - unix_tot_inflight--; + /* Paired with READ_ONCE() in wait_for_unix_gc() */ + WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1); } user->unix_inflight--; spin_unlock(&unix_gc_lock); diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index bc8055f4571b..d60d7caacbf5 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1222,6 +1222,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, sk->sk_state = sk->sk_state == TCP_ESTABLISHED ? TCP_CLOSING : TCP_CLOSE; sock->state = SS_UNCONNECTED; vsock_transport_cancel_pkt(vsk); + vsock_remove_connected(vsk); goto out_wait; } else if (timeout == 0) { err = -ETIMEDOUT; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 7633d6a74bc2..d3e2b97d5d05 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -12320,6 +12320,9 @@ static int handle_nan_filter(struct nlattr *attr_filter, i = 0; nla_for_each_nested(attr, attr_filter, rem) { filter[i].filter = nla_memdup(attr, GFP_KERNEL); + if (!filter[i].filter) + goto err; + filter[i].len = nla_len(attr); i++; } @@ -12332,6 +12335,15 @@ static int handle_nan_filter(struct nlattr *attr_filter, } return 0; + +err: + i = 0; + nla_for_each_nested(attr, attr_filter, rem) { + kfree(filter[i].filter); + i++; + } + kfree(filter); + return -ENOMEM; } static int nl80211_nan_add_func(struct sk_buff *skb, @@ -16302,7 +16314,8 @@ void cfg80211_ch_switch_notify(struct net_device *dev, wdev->chandef = *chandef; wdev->preset_chandef = *chandef; - if (wdev->iftype == NL80211_IFTYPE_STATION && + if ((wdev->iftype == NL80211_IFTYPE_STATION || + wdev->iftype == NL80211_IFTYPE_P2P_CLIENT) && !WARN_ON(!wdev->current_bss)) cfg80211_update_assoc_bss_entry(wdev, chandef->chan); diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 6cefaad3b7f8..6bb9437af28b 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1457,11 +1457,13 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, /* this is a nontransmitting bss, we need to add it to * transmitting bss' list if it is not there */ + spin_lock_bh(&rdev->bss_lock); if (cfg80211_add_nontrans_list(non_tx_data->tx_bss, &res->pub)) { if (__cfg80211_unlink_bss(rdev, res)) rdev->bss_generation++; } + spin_unlock_bh(&rdev->bss_lock); } trace_cfg80211_return_bss(&res->pub); diff --git a/net/wireless/util.c b/net/wireless/util.c index e7b1100925a3..5089c935c369 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -991,6 +991,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, switch (otype) { case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: cfg80211_stop_ap(rdev, dev, true); break; case NL80211_IFTYPE_ADHOC: diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index d8d603aa4887..c94aa587e0c9 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1767,10 +1767,15 @@ void x25_kill_by_neigh(struct x25_neigh *nb) write_lock_bh(&x25_list_lock); - sk_for_each(s, &x25_list) - if (x25_sk(s)->neighbour == nb) + sk_for_each(s, &x25_list) { + if (x25_sk(s)->neighbour == nb) { + write_unlock_bh(&x25_list_lock); + lock_sock(s); x25_disconnect(s, ENETUNREACH, 0, 0); - + release_sock(s); + write_lock_bh(&x25_list_lock); + } + } write_unlock_bh(&x25_list_lock); /* Remove any related forwards */ diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index bb2292b5260c..d758e9ec3d00 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -206,6 +206,9 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, if (x->encap || x->tfcpad) return -EINVAL; + if (xuo->flags & ~(XFRM_OFFLOAD_IPV6 | XFRM_OFFLOAD_INBOUND)) + return -EINVAL; + dev = dev_get_by_index(net, xuo->ifindex); if (!dev) { if (!(xuo->flags & XFRM_OFFLOAD_INBOUND)) { @@ -243,7 +246,8 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, xso->dev = dev; xso->num_exthdrs = 1; - xso->flags = xuo->flags; + /* Don't forward bit that is not implemented */ + xso->flags = xuo->flags & ~XFRM_OFFLOAD_IPV6; err = dev->xfrmdev_ops->xdo_dev_state_add(x); if (err) { diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 74e90d78c3b4..4cfa79e04e3d 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -300,7 +300,10 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + if (skb->len > 1280) + icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + else + goto xmit; } else { if (!(ip_hdr(skb)->frag_off & htons(IP_DF))) goto xmit; @@ -659,11 +662,16 @@ static int xfrmi_newlink(struct net *src_net, struct net_device *dev, struct netlink_ext_ack *extack) { struct net *net = dev_net(dev); - struct xfrm_if_parms p; + struct xfrm_if_parms p = {}; struct xfrm_if *xi; int err; xfrmi_netlink_parms(data, &p); + if (!p.if_id) { + NL_SET_ERR_MSG(extack, "if_id must be non zero"); + return -EINVAL; + } + xi = xfrmi_locate(net, &p); if (xi) return -EEXIST; @@ -688,9 +696,14 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[], { struct xfrm_if *xi = netdev_priv(dev); struct net *net = xi->net; - struct xfrm_if_parms p; + struct xfrm_if_parms p = {}; xfrmi_netlink_parms(data, &p); + if (!p.if_id) { + NL_SET_ERR_MSG(extack, "if_id must be non zero"); + return -EINVAL; + } + xi = xfrmi_locate(net, &p); if (!xi) { xi = netdev_priv(dev); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 32c816342797..3ecb77c58c44 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -31,8 +31,10 @@ #include <linux/if_tunnel.h> #include <net/dst.h> #include <net/flow.h> +#include <net/inet_ecn.h> #include <net/xfrm.h> #include <net/ip.h> +#include <net/gre.h> #if IS_ENABLED(CONFIG_IPV6_MIP6) #include <net/mip6.h> #endif @@ -3281,7 +3283,7 @@ decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse) fl4->flowi4_proto = iph->protocol; fl4->daddr = reverse ? iph->saddr : iph->daddr; fl4->saddr = reverse ? iph->daddr : iph->saddr; - fl4->flowi4_tos = iph->tos; + fl4->flowi4_tos = iph->tos & ~INET_ECN_MASK; if (!ip_is_fragment(iph)) { switch (iph->protocol) { @@ -3443,6 +3445,26 @@ decode_session6(struct sk_buff *skb, struct flowi *fl, bool reverse) } fl6->flowi6_proto = nexthdr; return; + case IPPROTO_GRE: + if (!onlyproto && + (nh + offset + 12 < skb->data || + pskb_may_pull(skb, nh + offset + 12 - skb->data))) { + struct gre_base_hdr *gre_hdr; + __be32 *gre_key; + + nh = skb_network_header(skb); + gre_hdr = (struct gre_base_hdr *)(nh + offset); + gre_key = (__be32 *)(gre_hdr + 1); + + if (gre_hdr->flags & GRE_KEY) { + if (gre_hdr->flags & GRE_CSUM) + gre_key++; + fl6->fl6_gre_key = *gre_key; + } + } + fl6->flowi6_proto = nexthdr; + return; + #if IS_ENABLED(CONFIG_IPV6_MIP6) case IPPROTO_MH: offset += ipv6_optlen(exthdr); @@ -4249,7 +4271,7 @@ static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp, } static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel, - u8 dir, u8 type, struct net *net) + u8 dir, u8 type, struct net *net, u32 if_id) { struct xfrm_policy *pol, *ret = NULL; struct hlist_head *chain; @@ -4258,7 +4280,8 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector * spin_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir); hlist_for_each_entry(pol, chain, bydst) { - if (xfrm_migrate_selector_match(sel, &pol->selector) && + if ((if_id == 0 || pol->if_id == if_id) && + xfrm_migrate_selector_match(sel, &pol->selector) && pol->type == type) { ret = pol; priority = ret->priority; @@ -4270,7 +4293,8 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector * if ((pol->priority >= priority) && ret) break; - if (xfrm_migrate_selector_match(sel, &pol->selector) && + if ((if_id == 0 || pol->if_id == if_id) && + xfrm_migrate_selector_match(sel, &pol->selector) && pol->type == type) { ret = pol; break; @@ -4386,7 +4410,7 @@ static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate) int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_migrate, struct xfrm_kmaddress *k, struct net *net, - struct xfrm_encap_tmpl *encap) + struct xfrm_encap_tmpl *encap, u32 if_id) { int i, err, nx_cur = 0, nx_new = 0; struct xfrm_policy *pol = NULL; @@ -4405,14 +4429,14 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, } /* Stage 1 - find policy */ - if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) { + if ((pol = xfrm_migrate_policy_find(sel, dir, type, net, if_id)) == NULL) { err = -ENOENT; goto out; } /* Stage 2 - find and update state(s) */ for (i = 0, mp = m; i < num_migrate; i++, mp++) { - if ((x = xfrm_migrate_state_find(mp, net))) { + if ((x = xfrm_migrate_state_find(mp, net, if_id))) { x_cur[nx_cur] = x; nx_cur++; xc = xfrm_state_migrate(x, mp, encap); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index c6b2c99b501b..268bba29bb60 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1539,9 +1539,6 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, memcpy(&x->mark, &orig->mark, sizeof(x->mark)); memcpy(&x->props.smark, &orig->props.smark, sizeof(x->props.smark)); - if (xfrm_init_state(x) < 0) - goto error; - x->props.flags = orig->props.flags; x->props.extra_flags = orig->props.extra_flags; @@ -1563,7 +1560,8 @@ out: return NULL; } -struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net) +struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net, + u32 if_id) { unsigned int h; struct xfrm_state *x = NULL; @@ -1579,6 +1577,8 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n continue; if (m->reqid && x->props.reqid != m->reqid) continue; + if (if_id != 0 && x->if_id != if_id) + continue; if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr, m->old_family) || !xfrm_addr_equal(&x->props.saddr, &m->old_saddr, @@ -1594,6 +1594,8 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n if (x->props.mode != m->mode || x->id.proto != m->proto) continue; + if (if_id != 0 && x->if_id != if_id) + continue; if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr, m->old_family) || !xfrm_addr_equal(&x->props.saddr, &m->old_saddr, @@ -1620,6 +1622,11 @@ struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x, if (!xc) return NULL; + xc->props.family = m->new_family; + + if (xfrm_init_state(xc) < 0) + goto error; + memcpy(&xc->id.daddr, &m->new_daddr, sizeof(xc->id.daddr)); memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr)); @@ -2440,7 +2447,7 @@ void xfrm_state_delete_tunnel(struct xfrm_state *x) } EXPORT_SYMBOL(xfrm_state_delete_tunnel); -u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu) +u32 xfrm_state_mtu(struct xfrm_state *x, int mtu) { const struct xfrm_type *type = READ_ONCE(x->type); struct crypto_aead *aead; @@ -2471,17 +2478,7 @@ u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu) return ((mtu - x->props.header_len - crypto_aead_authsize(aead) - net_adj) & ~(blksize - 1)) + net_adj - 2; } -EXPORT_SYMBOL_GPL(__xfrm_state_mtu); - -u32 xfrm_state_mtu(struct xfrm_state *x, int mtu) -{ - mtu = __xfrm_state_mtu(x, mtu); - - if (x->props.family == AF_INET6 && mtu < IPV6_MIN_MTU) - return IPV6_MIN_MTU; - - return mtu; -} +EXPORT_SYMBOL_GPL(xfrm_state_mtu); int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) { diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 0cee2d3c6e45..bd44a800e7db 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2374,6 +2374,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, int n = 0; struct net *net = sock_net(skb->sk); struct xfrm_encap_tmpl *encap = NULL; + u32 if_id = 0; if (attrs[XFRMA_MIGRATE] == NULL) return -EINVAL; @@ -2398,7 +2399,10 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; } - err = xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp, net, encap); + if (attrs[XFRMA_IF_ID]) + if_id = nla_get_u32(attrs[XFRMA_IF_ID]); + + err = xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp, net, encap, if_id); kfree(encap); @@ -2816,7 +2820,7 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x) if (x->props.extra_flags) l += nla_total_size(sizeof(x->props.extra_flags)); if (x->xso.dev) - l += nla_total_size(sizeof(x->xso)); + l += nla_total_size(sizeof(struct xfrm_user_offload)); if (x->props.smark.v | x->props.smark.m) { l += nla_total_size(sizeof(x->props.smark.v)); l += nla_total_size(sizeof(x->props.smark.m)); |