summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/datagram.c72
-rw-r--r--net/core/dev.c371
-rw-r--r--net/core/fib_rules.c25
-rw-r--r--net/core/flow_dissector.c17
-rw-r--r--net/core/iovec.c24
-rw-r--r--net/core/neighbour.c2
-rw-r--r--net/core/net-sysfs.c165
-rw-r--r--net/core/net_namespace.c2
-rw-r--r--net/core/netprio_cgroup.c72
-rw-r--r--net/core/pktgen.c61
-rw-r--r--net/core/rtnetlink.c29
-rw-r--r--net/core/scm.c6
-rw-r--r--net/core/skbuff.c19
-rw-r--r--net/core/sock.c166
-rw-r--r--net/core/stream.c2
-rw-r--r--net/core/sysctl_net_core.c30
16 files changed, 817 insertions, 246 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 8ab48cd89559..af814e764206 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -48,6 +48,7 @@
#include <linux/highmem.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
+#include <linux/pagemap.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
@@ -573,6 +574,77 @@ fault:
}
EXPORT_SYMBOL(skb_copy_datagram_from_iovec);
+/**
+ * zerocopy_sg_from_iovec - Build a zerocopy datagram from an iovec
+ * @skb: buffer to copy
+ * @from: io vector to copy to
+ * @offset: offset in the io vector to start copying from
+ * @count: amount of vectors to copy to buffer from
+ *
+ * The function will first copy up to headlen, and then pin the userspace
+ * pages and build frags through them.
+ *
+ * Returns 0, -EFAULT or -EMSGSIZE.
+ * Note: the iovec is not modified during the copy
+ */
+int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
+ int offset, size_t count)
+{
+ int len = iov_length(from, count) - offset;
+ int copy = min_t(int, skb_headlen(skb), len);
+ int size;
+ int i = 0;
+
+ /* copy up to skb headlen */
+ if (skb_copy_datagram_from_iovec(skb, 0, from, offset, copy))
+ return -EFAULT;
+
+ if (len == copy)
+ return 0;
+
+ offset += copy;
+ while (count--) {
+ struct page *page[MAX_SKB_FRAGS];
+ int num_pages;
+ unsigned long base;
+ unsigned long truesize;
+
+ /* Skip over from offset and copied */
+ if (offset >= from->iov_len) {
+ offset -= from->iov_len;
+ ++from;
+ continue;
+ }
+ len = from->iov_len - offset;
+ base = (unsigned long)from->iov_base + offset;
+ size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
+ if (i + size > MAX_SKB_FRAGS)
+ return -EMSGSIZE;
+ num_pages = get_user_pages_fast(base, size, 0, &page[i]);
+ if (num_pages != size) {
+ release_pages(&page[i], num_pages, 0);
+ return -EFAULT;
+ }
+ truesize = size * PAGE_SIZE;
+ skb->data_len += len;
+ skb->len += len;
+ skb->truesize += truesize;
+ atomic_add(truesize, &skb->sk->sk_wmem_alloc);
+ while (len) {
+ int off = base & ~PAGE_MASK;
+ int size = min_t(int, len, PAGE_SIZE - off);
+ skb_fill_page_desc(skb, i, page[i], off, size);
+ base += size;
+ len -= size;
+ i++;
+ }
+ offset = 0;
+ ++from;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(zerocopy_sg_from_iovec);
+
static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
u8 __user *to, int len,
__wsum *csump)
diff --git a/net/core/dev.c b/net/core/dev.c
index 26755dd40daa..5c713f2239cc 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -174,7 +174,7 @@ static DEFINE_SPINLOCK(napi_hash_lock);
static unsigned int napi_gen_id;
static DEFINE_HASHTABLE(napi_hash, 8);
-seqcount_t devnet_rename_seq;
+static seqcount_t devnet_rename_seq;
static inline void dev_base_seq_inc(struct net *net)
{
@@ -1691,13 +1691,13 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
kfree_skb(skb);
return NET_RX_DROP;
}
- skb_scrub_packet(skb);
skb->protocol = eth_type_trans(skb, dev);
/* eth_type_trans() can set pkt_type.
- * clear pkt_type _after_ calling eth_type_trans()
+ * call skb_scrub_packet() after it to clear pkt_type _after_ calling
+ * eth_type_trans().
*/
- skb->pkt_type = PACKET_HOST;
+ skb_scrub_packet(skb, true);
return netif_rx(skb);
}
@@ -4367,57 +4367,48 @@ softnet_break:
goto out;
}
-struct netdev_upper {
+struct netdev_adjacent {
struct net_device *dev;
+
+ /* upper master flag, there can only be one master device per list */
bool master;
+
+ /* indicates that this dev is our first-level lower/upper device */
+ bool neighbour;
+
+ /* counter for the number of times this device was added to us */
+ u16 ref_nr;
+
struct list_head list;
struct rcu_head rcu;
- struct list_head search_list;
};
-static void __append_search_uppers(struct list_head *search_list,
- struct net_device *dev)
+static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,
+ struct net_device *adj_dev,
+ bool upper)
{
- struct netdev_upper *upper;
+ struct netdev_adjacent *adj;
+ struct list_head *dev_list;
- list_for_each_entry(upper, &dev->upper_dev_list, list) {
- /* check if this upper is not already in search list */
- if (list_empty(&upper->search_list))
- list_add_tail(&upper->search_list, search_list);
+ dev_list = upper ? &dev->upper_dev_list : &dev->lower_dev_list;
+
+ list_for_each_entry(adj, dev_list, list) {
+ if (adj->dev == adj_dev)
+ return adj;
}
+ return NULL;
}
-static bool __netdev_search_upper_dev(struct net_device *dev,
- struct net_device *upper_dev)
+static inline struct netdev_adjacent *__netdev_find_upper(struct net_device *dev,
+ struct net_device *udev)
{
- LIST_HEAD(search_list);
- struct netdev_upper *upper;
- struct netdev_upper *tmp;
- bool ret = false;
-
- __append_search_uppers(&search_list, dev);
- list_for_each_entry(upper, &search_list, search_list) {
- if (upper->dev == upper_dev) {
- ret = true;
- break;
- }
- __append_search_uppers(&search_list, upper->dev);
- }
- list_for_each_entry_safe(upper, tmp, &search_list, search_list)
- INIT_LIST_HEAD(&upper->search_list);
- return ret;
+ return __netdev_find_adj(dev, udev, true);
}
-static struct netdev_upper *__netdev_find_upper(struct net_device *dev,
- struct net_device *upper_dev)
+static inline struct netdev_adjacent *__netdev_find_lower(struct net_device *dev,
+ struct net_device *ldev)
{
- struct netdev_upper *upper;
-
- list_for_each_entry(upper, &dev->upper_dev_list, list) {
- if (upper->dev == upper_dev)
- return upper;
- }
- return NULL;
+ return __netdev_find_adj(dev, ldev, false);
}
/**
@@ -4462,7 +4453,7 @@ EXPORT_SYMBOL(netdev_has_any_upper_dev);
*/
struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
{
- struct netdev_upper *upper;
+ struct netdev_adjacent *upper;
ASSERT_RTNL();
@@ -4470,13 +4461,38 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
return NULL;
upper = list_first_entry(&dev->upper_dev_list,
- struct netdev_upper, list);
+ struct netdev_adjacent, list);
if (likely(upper->master))
return upper->dev;
return NULL;
}
EXPORT_SYMBOL(netdev_master_upper_dev_get);
+/* netdev_upper_get_next_dev_rcu - Get the next dev from upper list
+ * @dev: device
+ * @iter: list_head ** of the current position
+ *
+ * Gets the next device from the dev's upper list, starting from iter
+ * position. The caller must hold RCU read lock.
+ */
+struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
+ struct list_head **iter)
+{
+ struct netdev_adjacent *upper;
+
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
+ upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
+
+ if (&upper->list == &dev->upper_dev_list)
+ return NULL;
+
+ *iter = &upper->list;
+
+ return upper->dev;
+}
+EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
+
/**
* netdev_master_upper_dev_get_rcu - Get master upper device
* @dev: device
@@ -4486,20 +4502,158 @@ EXPORT_SYMBOL(netdev_master_upper_dev_get);
*/
struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
{
- struct netdev_upper *upper;
+ struct netdev_adjacent *upper;
upper = list_first_or_null_rcu(&dev->upper_dev_list,
- struct netdev_upper, list);
+ struct netdev_adjacent, list);
if (upper && likely(upper->master))
return upper->dev;
return NULL;
}
EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
+static int __netdev_adjacent_dev_insert(struct net_device *dev,
+ struct net_device *adj_dev,
+ bool neighbour, bool master,
+ bool upper)
+{
+ struct netdev_adjacent *adj;
+
+ adj = __netdev_find_adj(dev, adj_dev, upper);
+
+ if (adj) {
+ BUG_ON(neighbour);
+ adj->ref_nr++;
+ return 0;
+ }
+
+ adj = kmalloc(sizeof(*adj), GFP_KERNEL);
+ if (!adj)
+ return -ENOMEM;
+
+ adj->dev = adj_dev;
+ adj->master = master;
+ adj->neighbour = neighbour;
+ adj->ref_nr = 1;
+
+ dev_hold(adj_dev);
+ pr_debug("dev_hold for %s, because of %s link added from %s to %s\n",
+ adj_dev->name, upper ? "upper" : "lower", dev->name,
+ adj_dev->name);
+
+ if (!upper) {
+ list_add_tail_rcu(&adj->list, &dev->lower_dev_list);
+ return 0;
+ }
+
+ /* Ensure that master upper link is always the first item in list. */
+ if (master)
+ list_add_rcu(&adj->list, &dev->upper_dev_list);
+ else
+ list_add_tail_rcu(&adj->list, &dev->upper_dev_list);
+
+ return 0;
+}
+
+static inline int __netdev_upper_dev_insert(struct net_device *dev,
+ struct net_device *udev,
+ bool master, bool neighbour)
+{
+ return __netdev_adjacent_dev_insert(dev, udev, neighbour, master,
+ true);
+}
+
+static inline int __netdev_lower_dev_insert(struct net_device *dev,
+ struct net_device *ldev,
+ bool neighbour)
+{
+ return __netdev_adjacent_dev_insert(dev, ldev, neighbour, false,
+ false);
+}
+
+void __netdev_adjacent_dev_remove(struct net_device *dev,
+ struct net_device *adj_dev, bool upper)
+{
+ struct netdev_adjacent *adj;
+
+ if (upper)
+ adj = __netdev_find_upper(dev, adj_dev);
+ else
+ adj = __netdev_find_lower(dev, adj_dev);
+
+ if (!adj)
+ BUG();
+
+ if (adj->ref_nr > 1) {
+ adj->ref_nr--;
+ return;
+ }
+
+ list_del_rcu(&adj->list);
+ pr_debug("dev_put for %s, because of %s link removed from %s to %s\n",
+ adj_dev->name, upper ? "upper" : "lower", dev->name,
+ adj_dev->name);
+ dev_put(adj_dev);
+ kfree_rcu(adj, rcu);
+}
+
+static inline void __netdev_upper_dev_remove(struct net_device *dev,
+ struct net_device *udev)
+{
+ return __netdev_adjacent_dev_remove(dev, udev, true);
+}
+
+static inline void __netdev_lower_dev_remove(struct net_device *dev,
+ struct net_device *ldev)
+{
+ return __netdev_adjacent_dev_remove(dev, ldev, false);
+}
+
+int __netdev_adjacent_dev_insert_link(struct net_device *dev,
+ struct net_device *upper_dev,
+ bool master, bool neighbour)
+{
+ int ret;
+
+ ret = __netdev_upper_dev_insert(dev, upper_dev, master, neighbour);
+ if (ret)
+ return ret;
+
+ ret = __netdev_lower_dev_insert(upper_dev, dev, neighbour);
+ if (ret) {
+ __netdev_upper_dev_remove(dev, upper_dev);
+ return ret;
+ }
+
+ return 0;
+}
+
+static inline int __netdev_adjacent_dev_link(struct net_device *dev,
+ struct net_device *udev)
+{
+ return __netdev_adjacent_dev_insert_link(dev, udev, false, false);
+}
+
+static inline int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
+ struct net_device *udev,
+ bool master)
+{
+ return __netdev_adjacent_dev_insert_link(dev, udev, master, true);
+}
+
+void __netdev_adjacent_dev_unlink(struct net_device *dev,
+ struct net_device *upper_dev)
+{
+ __netdev_upper_dev_remove(dev, upper_dev);
+ __netdev_lower_dev_remove(upper_dev, dev);
+}
+
+
static int __netdev_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev, bool master)
{
- struct netdev_upper *upper;
+ struct netdev_adjacent *i, *j, *to_i, *to_j;
+ int ret = 0;
ASSERT_RTNL();
@@ -4507,7 +4661,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
return -EBUSY;
/* To prevent loops, check if dev is not upper device to upper_dev. */
- if (__netdev_search_upper_dev(upper_dev, dev))
+ if (__netdev_find_upper(upper_dev, dev))
return -EBUSY;
if (__netdev_find_upper(dev, upper_dev))
@@ -4516,22 +4670,76 @@ static int __netdev_upper_dev_link(struct net_device *dev,
if (master && netdev_master_upper_dev_get(dev))
return -EBUSY;
- upper = kmalloc(sizeof(*upper), GFP_KERNEL);
- if (!upper)
- return -ENOMEM;
+ ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, master);
+ if (ret)
+ return ret;
- upper->dev = upper_dev;
- upper->master = master;
- INIT_LIST_HEAD(&upper->search_list);
+ /* Now that we linked these devs, make all the upper_dev's
+ * upper_dev_list visible to every dev's lower_dev_list and vice
+ * versa, and don't forget the devices itself. All of these
+ * links are non-neighbours.
+ */
+ list_for_each_entry(i, &dev->lower_dev_list, list) {
+ list_for_each_entry(j, &upper_dev->upper_dev_list, list) {
+ ret = __netdev_adjacent_dev_link(i->dev, j->dev);
+ if (ret)
+ goto rollback_mesh;
+ }
+ }
+
+ /* add dev to every upper_dev's upper device */
+ list_for_each_entry(i, &upper_dev->upper_dev_list, list) {
+ ret = __netdev_adjacent_dev_link(dev, i->dev);
+ if (ret)
+ goto rollback_upper_mesh;
+ }
+
+ /* add upper_dev to every dev's lower device */
+ list_for_each_entry(i, &dev->lower_dev_list, list) {
+ ret = __netdev_adjacent_dev_link(i->dev, upper_dev);
+ if (ret)
+ goto rollback_lower_mesh;
+ }
- /* Ensure that master upper link is always the first item in list. */
- if (master)
- list_add_rcu(&upper->list, &dev->upper_dev_list);
- else
- list_add_tail_rcu(&upper->list, &dev->upper_dev_list);
- dev_hold(upper_dev);
call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
return 0;
+
+rollback_lower_mesh:
+ to_i = i;
+ list_for_each_entry(i, &dev->lower_dev_list, list) {
+ if (i == to_i)
+ break;
+ __netdev_adjacent_dev_unlink(i->dev, upper_dev);
+ }
+
+ i = NULL;
+
+rollback_upper_mesh:
+ to_i = i;
+ list_for_each_entry(i, &upper_dev->upper_dev_list, list) {
+ if (i == to_i)
+ break;
+ __netdev_adjacent_dev_unlink(dev, i->dev);
+ }
+
+ i = j = NULL;
+
+rollback_mesh:
+ to_i = i;
+ to_j = j;
+ list_for_each_entry(i, &dev->lower_dev_list, list) {
+ list_for_each_entry(j, &upper_dev->upper_dev_list, list) {
+ if (i == to_i && j == to_j)
+ break;
+ __netdev_adjacent_dev_unlink(i->dev, j->dev);
+ }
+ if (i == to_i)
+ break;
+ }
+
+ __netdev_adjacent_dev_unlink(dev, upper_dev);
+
+ return ret;
}
/**
@@ -4580,16 +4788,28 @@ EXPORT_SYMBOL(netdev_master_upper_dev_link);
void netdev_upper_dev_unlink(struct net_device *dev,
struct net_device *upper_dev)
{
- struct netdev_upper *upper;
-
+ struct netdev_adjacent *i, *j;
ASSERT_RTNL();
- upper = __netdev_find_upper(dev, upper_dev);
- if (!upper)
- return;
- list_del_rcu(&upper->list);
- dev_put(upper_dev);
- kfree_rcu(upper, rcu);
+ __netdev_adjacent_dev_unlink(dev, upper_dev);
+
+ /* Here is the tricky part. We must remove all dev's lower
+ * devices from all upper_dev's upper devices and vice
+ * versa, to maintain the graph relationship.
+ */
+ list_for_each_entry(i, &dev->lower_dev_list, list)
+ list_for_each_entry(j, &upper_dev->upper_dev_list, list)
+ __netdev_adjacent_dev_unlink(i->dev, j->dev);
+
+ /* remove also the devices itself from lower/upper device
+ * list
+ */
+ list_for_each_entry(i, &dev->lower_dev_list, list)
+ __netdev_adjacent_dev_unlink(i->dev, upper_dev);
+
+ list_for_each_entry(i, &upper_dev->upper_dev_list, list)
+ __netdev_adjacent_dev_unlink(dev, i->dev);
+
call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
}
EXPORT_SYMBOL(netdev_upper_dev_unlink);
@@ -4989,6 +5209,24 @@ int dev_change_carrier(struct net_device *dev, bool new_carrier)
EXPORT_SYMBOL(dev_change_carrier);
/**
+ * dev_get_phys_port_id - Get device physical port ID
+ * @dev: device
+ * @ppid: port ID
+ *
+ * Get device physical port ID
+ */
+int dev_get_phys_port_id(struct net_device *dev,
+ struct netdev_phys_port_id *ppid)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (!ops->ndo_get_phys_port_id)
+ return -EOPNOTSUPP;
+ return ops->ndo_get_phys_port_id(dev, ppid);
+}
+EXPORT_SYMBOL(dev_get_phys_port_id);
+
+/**
* dev_new_index - allocate an ifindex
* @net: the applicable net namespace
*
@@ -5832,6 +6070,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
INIT_LIST_HEAD(&dev->unreg_list);
INIT_LIST_HEAD(&dev->link_watch_list);
INIT_LIST_HEAD(&dev->upper_dev_list);
+ INIT_LIST_HEAD(&dev->lower_dev_list);
dev->priv_flags = IFF_XMIT_DST_RELEASE;
setup(dev);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 21735440c44a..2e654138433c 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -33,6 +33,9 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
r->flags = flags;
r->fr_net = hold_net(ops->fro_net);
+ r->suppress_prefixlen = -1;
+ r->suppress_ifgroup = -1;
+
/* The lock is not required here, the list in unreacheable
* at the moment this function is called */
list_add_tail(&r->list, &ops->rules_list);
@@ -226,6 +229,9 @@ jumped:
else
err = ops->action(rule, fl, flags, arg);
+ if (!err && ops->suppress && ops->suppress(rule, arg))
+ continue;
+
if (err != -EAGAIN) {
if ((arg->flags & FIB_LOOKUP_NOREF) ||
likely(atomic_inc_not_zero(&rule->refcnt))) {
@@ -337,6 +343,15 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
rule->action = frh->action;
rule->flags = frh->flags;
rule->table = frh_get_table(frh, tb);
+ if (tb[FRA_SUPPRESS_PREFIXLEN])
+ rule->suppress_prefixlen = nla_get_u32(tb[FRA_SUPPRESS_PREFIXLEN]);
+ else
+ rule->suppress_prefixlen = -1;
+
+ if (tb[FRA_SUPPRESS_IFGROUP])
+ rule->suppress_ifgroup = nla_get_u32(tb[FRA_SUPPRESS_IFGROUP]);
+ else
+ rule->suppress_ifgroup = -1;
if (!tb[FRA_PRIORITY] && ops->default_pref)
rule->pref = ops->default_pref(ops);
@@ -523,6 +538,8 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
+ nla_total_size(IFNAMSIZ) /* FRA_OIFNAME */
+ nla_total_size(4) /* FRA_PRIORITY */
+ nla_total_size(4) /* FRA_TABLE */
+ + nla_total_size(4) /* FRA_SUPPRESS_PREFIXLEN */
+ + nla_total_size(4) /* FRA_SUPPRESS_IFGROUP */
+ nla_total_size(4) /* FRA_FWMARK */
+ nla_total_size(4); /* FRA_FWMASK */
@@ -548,6 +565,8 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
frh->table = rule->table;
if (nla_put_u32(skb, FRA_TABLE, rule->table))
goto nla_put_failure;
+ if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen))
+ goto nla_put_failure;
frh->res1 = 0;
frh->res2 = 0;
frh->action = rule->action;
@@ -580,6 +599,12 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
(rule->target &&
nla_put_u32(skb, FRA_GOTO, rule->target)))
goto nla_put_failure;
+
+ if (rule->suppress_ifgroup != -1) {
+ if (nla_put_u32(skb, FRA_SUPPRESS_IFGROUP, rule->suppress_ifgroup))
+ goto nla_put_failure;
+ }
+
if (ops->fill(rule, skb, frh) < 0)
goto nla_put_failure;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index b84a1b155bc1..1929af87b260 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -140,7 +140,11 @@ ipv6:
break;
}
case IPPROTO_IPIP:
- goto again;
+ proto = htons(ETH_P_IP);
+ goto ip;
+ case IPPROTO_IPV6:
+ proto = htons(ETH_P_IPV6);
+ goto ipv6;
default:
break;
}
@@ -346,14 +350,9 @@ u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
if (new_index < 0)
new_index = skb_tx_hash(dev, skb);
- if (queue_index != new_index && sk) {
- struct dst_entry *dst =
- rcu_dereference_check(sk->sk_dst_cache, 1);
-
- if (dst && skb_dst(skb) == dst)
- sk_tx_queue_set(sk, queue_index);
-
- }
+ if (queue_index != new_index && sk &&
+ rcu_access_pointer(sk->sk_dst_cache))
+ sk_tx_queue_set(sk, new_index);
queue_index = new_index;
}
diff --git a/net/core/iovec.c b/net/core/iovec.c
index de178e462682..b77eeecc0011 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -212,3 +212,27 @@ out_fault:
goto out;
}
EXPORT_SYMBOL(csum_partial_copy_fromiovecend);
+
+unsigned long iov_pages(const struct iovec *iov, int offset,
+ unsigned long nr_segs)
+{
+ unsigned long seg, base;
+ int pages = 0, len, size;
+
+ while (nr_segs && (offset >= iov->iov_len)) {
+ offset -= iov->iov_len;
+ ++iov;
+ --nr_segs;
+ }
+
+ for (seg = 0; seg < nr_segs; seg++) {
+ base = (unsigned long)iov[seg].iov_base + offset;
+ len = iov[seg].iov_len - offset;
+ size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
+ pages += size;
+ offset = 0;
+ }
+
+ return pages;
+}
+EXPORT_SYMBOL(iov_pages);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 60533db8b72d..6072610a8672 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2759,13 +2759,11 @@ errout:
rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
}
-#ifdef CONFIG_ARPD
void neigh_app_ns(struct neighbour *n)
{
__neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
}
EXPORT_SYMBOL(neigh_app_ns);
-#endif /* CONFIG_ARPD */
#ifdef CONFIG_SYSCTL
static int zero;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 981fed397d1d..d954b56b4e47 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -60,12 +60,19 @@ static ssize_t format_##field(const struct net_device *net, char *buf) \
{ \
return sprintf(buf, format_string, net->field); \
} \
-static ssize_t show_##field(struct device *dev, \
+static ssize_t field##_show(struct device *dev, \
struct device_attribute *attr, char *buf) \
{ \
return netdev_show(dev, attr, buf, format_##field); \
-}
+} \
+
+#define NETDEVICE_SHOW_RO(field, format_string) \
+NETDEVICE_SHOW(field, format_string); \
+static DEVICE_ATTR_RO(field)
+#define NETDEVICE_SHOW_RW(field, format_string) \
+NETDEVICE_SHOW(field, format_string); \
+static DEVICE_ATTR_RW(field)
/* use same locking and permission rules as SIF* ioctl's */
static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
@@ -96,16 +103,16 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
return ret;
}
-NETDEVICE_SHOW(dev_id, fmt_hex);
-NETDEVICE_SHOW(addr_assign_type, fmt_dec);
-NETDEVICE_SHOW(addr_len, fmt_dec);
-NETDEVICE_SHOW(iflink, fmt_dec);
-NETDEVICE_SHOW(ifindex, fmt_dec);
-NETDEVICE_SHOW(type, fmt_dec);
-NETDEVICE_SHOW(link_mode, fmt_dec);
+NETDEVICE_SHOW_RO(dev_id, fmt_hex);
+NETDEVICE_SHOW_RO(addr_assign_type, fmt_dec);
+NETDEVICE_SHOW_RO(addr_len, fmt_dec);
+NETDEVICE_SHOW_RO(iflink, fmt_dec);
+NETDEVICE_SHOW_RO(ifindex, fmt_dec);
+NETDEVICE_SHOW_RO(type, fmt_dec);
+NETDEVICE_SHOW_RO(link_mode, fmt_dec);
/* use same locking rules as GIFHWADDR ioctl's */
-static ssize_t show_address(struct device *dev, struct device_attribute *attr,
+static ssize_t address_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct net_device *net = to_net_dev(dev);
@@ -117,15 +124,17 @@ static ssize_t show_address(struct device *dev, struct device_attribute *attr,
read_unlock(&dev_base_lock);
return ret;
}
+static DEVICE_ATTR_RO(address);
-static ssize_t show_broadcast(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t broadcast_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct net_device *net = to_net_dev(dev);
if (dev_isalive(net))
return sysfs_format_mac(buf, net->broadcast, net->addr_len);
return -EINVAL;
}
+static DEVICE_ATTR_RO(broadcast);
static int change_carrier(struct net_device *net, unsigned long new_carrier)
{
@@ -134,13 +143,13 @@ static int change_carrier(struct net_device *net, unsigned long new_carrier)
return dev_change_carrier(net, (bool) new_carrier);
}
-static ssize_t store_carrier(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t len)
+static ssize_t carrier_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t len)
{
return netdev_store(dev, attr, buf, len, change_carrier);
}
-static ssize_t show_carrier(struct device *dev,
+static ssize_t carrier_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct net_device *netdev = to_net_dev(dev);
@@ -149,8 +158,9 @@ static ssize_t show_carrier(struct device *dev,
}
return -EINVAL;
}
+static DEVICE_ATTR_RW(carrier);
-static ssize_t show_speed(struct device *dev,
+static ssize_t speed_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct net_device *netdev = to_net_dev(dev);
@@ -167,8 +177,9 @@ static ssize_t show_speed(struct device *dev,
rtnl_unlock();
return ret;
}
+static DEVICE_ATTR_RO(speed);
-static ssize_t show_duplex(struct device *dev,
+static ssize_t duplex_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct net_device *netdev = to_net_dev(dev);
@@ -198,8 +209,9 @@ static ssize_t show_duplex(struct device *dev,
rtnl_unlock();
return ret;
}
+static DEVICE_ATTR_RO(duplex);
-static ssize_t show_dormant(struct device *dev,
+static ssize_t dormant_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct net_device *netdev = to_net_dev(dev);
@@ -209,6 +221,7 @@ static ssize_t show_dormant(struct device *dev,
return -EINVAL;
}
+static DEVICE_ATTR_RO(dormant);
static const char *const operstates[] = {
"unknown",
@@ -220,7 +233,7 @@ static const char *const operstates[] = {
"up"
};
-static ssize_t show_operstate(struct device *dev,
+static ssize_t operstate_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
const struct net_device *netdev = to_net_dev(dev);
@@ -237,35 +250,33 @@ static ssize_t show_operstate(struct device *dev,
return sprintf(buf, "%s\n", operstates[operstate]);
}
+static DEVICE_ATTR_RO(operstate);
/* read-write attributes */
-NETDEVICE_SHOW(mtu, fmt_dec);
static int change_mtu(struct net_device *net, unsigned long new_mtu)
{
return dev_set_mtu(net, (int) new_mtu);
}
-static ssize_t store_mtu(struct device *dev, struct device_attribute *attr,
+static ssize_t mtu_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t len)
{
return netdev_store(dev, attr, buf, len, change_mtu);
}
-
-NETDEVICE_SHOW(flags, fmt_hex);
+NETDEVICE_SHOW_RW(mtu, fmt_dec);
static int change_flags(struct net_device *net, unsigned long new_flags)
{
return dev_change_flags(net, (unsigned int) new_flags);
}
-static ssize_t store_flags(struct device *dev, struct device_attribute *attr,
+static ssize_t flags_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t len)
{
return netdev_store(dev, attr, buf, len, change_flags);
}
-
-NETDEVICE_SHOW(tx_queue_len, fmt_ulong);
+NETDEVICE_SHOW_RW(flags, fmt_hex);
static int change_tx_queue_len(struct net_device *net, unsigned long new_len)
{
@@ -273,7 +284,7 @@ static int change_tx_queue_len(struct net_device *net, unsigned long new_len)
return 0;
}
-static ssize_t store_tx_queue_len(struct device *dev,
+static ssize_t tx_queue_len_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t len)
{
@@ -282,8 +293,9 @@ static ssize_t store_tx_queue_len(struct device *dev,
return netdev_store(dev, attr, buf, len, change_tx_queue_len);
}
+NETDEVICE_SHOW_RW(tx_queue_len, fmt_ulong);
-static ssize_t store_ifalias(struct device *dev, struct device_attribute *attr,
+static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t len)
{
struct net_device *netdev = to_net_dev(dev);
@@ -306,7 +318,7 @@ static ssize_t store_ifalias(struct device *dev, struct device_attribute *attr,
return ret < 0 ? ret : len;
}
-static ssize_t show_ifalias(struct device *dev,
+static ssize_t ifalias_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
const struct net_device *netdev = to_net_dev(dev);
@@ -319,8 +331,7 @@ static ssize_t show_ifalias(struct device *dev,
rtnl_unlock();
return ret;
}
-
-NETDEVICE_SHOW(group, fmt_dec);
+static DEVICE_ATTR_RW(ifalias);
static int change_group(struct net_device *net, unsigned long new_group)
{
@@ -328,35 +339,60 @@ static int change_group(struct net_device *net, unsigned long new_group)
return 0;
}
-static ssize_t store_group(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t len)
+static ssize_t group_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t len)
{
return netdev_store(dev, attr, buf, len, change_group);
}
+NETDEVICE_SHOW(group, fmt_dec);
+static DEVICE_ATTR(netdev_group, S_IRUGO | S_IWUSR, group_show, group_store);
+
+static ssize_t phys_port_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct net_device *netdev = to_net_dev(dev);
+ ssize_t ret = -EINVAL;
-static struct device_attribute net_class_attributes[] = {
- __ATTR(addr_assign_type, S_IRUGO, show_addr_assign_type, NULL),
- __ATTR(addr_len, S_IRUGO, show_addr_len, NULL),
- __ATTR(dev_id, S_IRUGO, show_dev_id, NULL),
- __ATTR(ifalias, S_IRUGO | S_IWUSR, show_ifalias, store_ifalias),
- __ATTR(iflink, S_IRUGO, show_iflink, NULL),
- __ATTR(ifindex, S_IRUGO, show_ifindex, NULL),
- __ATTR(type, S_IRUGO, show_type, NULL),
- __ATTR(link_mode, S_IRUGO, show_link_mode, NULL),
- __ATTR(address, S_IRUGO, show_address, NULL),
- __ATTR(broadcast, S_IRUGO, show_broadcast, NULL),
- __ATTR(carrier, S_IRUGO | S_IWUSR, show_carrier, store_carrier),
- __ATTR(speed, S_IRUGO, show_speed, NULL),
- __ATTR(duplex, S_IRUGO, show_duplex, NULL),
- __ATTR(dormant, S_IRUGO, show_dormant, NULL),
- __ATTR(operstate, S_IRUGO, show_operstate, NULL),
- __ATTR(mtu, S_IRUGO | S_IWUSR, show_mtu, store_mtu),
- __ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags),
- __ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len,
- store_tx_queue_len),
- __ATTR(netdev_group, S_IRUGO | S_IWUSR, show_group, store_group),
- {}
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ if (dev_isalive(netdev)) {
+ struct netdev_phys_port_id ppid;
+
+ ret = dev_get_phys_port_id(netdev, &ppid);
+ if (!ret)
+ ret = sprintf(buf, "%*phN\n", ppid.id_len, ppid.id);
+ }
+ rtnl_unlock();
+
+ return ret;
+}
+static DEVICE_ATTR_RO(phys_port_id);
+
+static struct attribute *net_class_attrs[] = {
+ &dev_attr_netdev_group.attr,
+ &dev_attr_type.attr,
+ &dev_attr_dev_id.attr,
+ &dev_attr_iflink.attr,
+ &dev_attr_ifindex.attr,
+ &dev_attr_addr_assign_type.attr,
+ &dev_attr_addr_len.attr,
+ &dev_attr_link_mode.attr,
+ &dev_attr_address.attr,
+ &dev_attr_broadcast.attr,
+ &dev_attr_speed.attr,
+ &dev_attr_duplex.attr,
+ &dev_attr_dormant.attr,
+ &dev_attr_operstate.attr,
+ &dev_attr_ifalias.attr,
+ &dev_attr_carrier.attr,
+ &dev_attr_mtu.attr,
+ &dev_attr_flags.attr,
+ &dev_attr_tx_queue_len.attr,
+ &dev_attr_phys_port_id.attr,
+ NULL,
};
+ATTRIBUTE_GROUPS(net_class);
/* Show a given an attribute in the statistics group */
static ssize_t netstat_show(const struct device *d,
@@ -382,13 +418,13 @@ static ssize_t netstat_show(const struct device *d,
/* generate a read-only statistics attribute */
#define NETSTAT_ENTRY(name) \
-static ssize_t show_##name(struct device *d, \
+static ssize_t name##_show(struct device *d, \
struct device_attribute *attr, char *buf) \
{ \
return netstat_show(d, attr, buf, \
offsetof(struct rtnl_link_stats64, name)); \
} \
-static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
+static DEVICE_ATTR_RO(name)
NETSTAT_ENTRY(rx_packets);
NETSTAT_ENTRY(tx_packets);
@@ -457,6 +493,9 @@ static struct attribute_group wireless_group = {
.attrs = wireless_attrs,
};
#endif
+
+#else /* CONFIG_SYSFS */
+#define net_class_groups NULL
#endif /* CONFIG_SYSFS */
#ifdef CONFIG_RPS
@@ -1157,6 +1196,13 @@ static void remove_queue_kobjects(struct net_device *net)
#endif
}
+static bool net_current_may_mount(void)
+{
+ struct net *net = current->nsproxy->net_ns;
+
+ return ns_capable(net->user_ns, CAP_SYS_ADMIN);
+}
+
static void *net_grab_current_ns(void)
{
struct net *ns = current->nsproxy->net_ns;
@@ -1179,6 +1225,7 @@ static const void *net_netlink_ns(struct sock *sk)
struct kobj_ns_type_operations net_ns_type_operations = {
.type = KOBJ_NS_TYPE_NET,
+ .current_may_mount = net_current_may_mount,
.grab_current_ns = net_grab_current_ns,
.netlink_ns = net_netlink_ns,
.initial_ns = net_initial_ns,
@@ -1229,9 +1276,7 @@ static const void *net_namespace(struct device *d)
static struct class net_class = {
.name = "net",
.dev_release = netdev_release,
-#ifdef CONFIG_SYSFS
- .dev_attrs = net_class_attributes,
-#endif /* CONFIG_SYSFS */
+ .dev_groups = net_class_groups,
.dev_uevent = netdev_uevent,
.ns_type = &net_ns_type_operations,
.namespace = net_namespace,
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index f97652036754..81d3a9a08453 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -651,7 +651,7 @@ static int netns_install(struct nsproxy *nsproxy, void *ns)
struct net *net = ns;
if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) ||
- !nsown_capable(CAP_SYS_ADMIN))
+ !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
return -EPERM;
put_net(nsproxy->net_ns);
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index e533259dce3c..d9cd627e6a16 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -29,12 +29,6 @@
#define PRIOMAP_MIN_SZ 128
-static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp)
-{
- return container_of(cgroup_subsys_state(cgrp, net_prio_subsys_id),
- struct cgroup_netprio_state, css);
-}
-
/*
* Extend @dev->priomap so that it's large enough to accomodate
* @target_idx. @dev->priomap.priomap_len > @target_idx after successful
@@ -87,67 +81,70 @@ static int extend_netdev_table(struct net_device *dev, u32 target_idx)
/**
* netprio_prio - return the effective netprio of a cgroup-net_device pair
- * @cgrp: cgroup part of the target pair
+ * @css: css part of the target pair
* @dev: net_device part of the target pair
*
* Should be called under RCU read or rtnl lock.
*/
-static u32 netprio_prio(struct cgroup *cgrp, struct net_device *dev)
+static u32 netprio_prio(struct cgroup_subsys_state *css, struct net_device *dev)
{
struct netprio_map *map = rcu_dereference_rtnl(dev->priomap);
+ int id = css->cgroup->id;
- if (map && cgrp->id < map->priomap_len)
- return map->priomap[cgrp->id];
+ if (map && id < map->priomap_len)
+ return map->priomap[id];
return 0;
}
/**
* netprio_set_prio - set netprio on a cgroup-net_device pair
- * @cgrp: cgroup part of the target pair
+ * @css: css part of the target pair
* @dev: net_device part of the target pair
* @prio: prio to set
*
- * Set netprio to @prio on @cgrp-@dev pair. Should be called under rtnl
+ * Set netprio to @prio on @css-@dev pair. Should be called under rtnl
* lock and may fail under memory pressure for non-zero @prio.
*/
-static int netprio_set_prio(struct cgroup *cgrp, struct net_device *dev,
- u32 prio)
+static int netprio_set_prio(struct cgroup_subsys_state *css,
+ struct net_device *dev, u32 prio)
{
struct netprio_map *map;
+ int id = css->cgroup->id;
int ret;
/* avoid extending priomap for zero writes */
map = rtnl_dereference(dev->priomap);
- if (!prio && (!map || map->priomap_len <= cgrp->id))
+ if (!prio && (!map || map->priomap_len <= id))
return 0;
- ret = extend_netdev_table(dev, cgrp->id);
+ ret = extend_netdev_table(dev, id);
if (ret)
return ret;
map = rtnl_dereference(dev->priomap);
- map->priomap[cgrp->id] = prio;
+ map->priomap[id] = prio;
return 0;
}
-static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp)
+static struct cgroup_subsys_state *
+cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
{
- struct cgroup_netprio_state *cs;
+ struct cgroup_subsys_state *css;
- cs = kzalloc(sizeof(*cs), GFP_KERNEL);
- if (!cs)
+ css = kzalloc(sizeof(*css), GFP_KERNEL);
+ if (!css)
return ERR_PTR(-ENOMEM);
- return &cs->css;
+ return css;
}
-static int cgrp_css_online(struct cgroup *cgrp)
+static int cgrp_css_online(struct cgroup_subsys_state *css)
{
- struct cgroup *parent = cgrp->parent;
+ struct cgroup_subsys_state *parent_css = css_parent(css);
struct net_device *dev;
int ret = 0;
- if (!parent)
+ if (!parent_css)
return 0;
rtnl_lock();
@@ -156,9 +153,9 @@ static int cgrp_css_online(struct cgroup *cgrp)
* onlining, there is no need to clear them on offline.
*/
for_each_netdev(&init_net, dev) {
- u32 prio = netprio_prio(parent, dev);
+ u32 prio = netprio_prio(parent_css, dev);
- ret = netprio_set_prio(cgrp, dev, prio);
+ ret = netprio_set_prio(css, dev, prio);
if (ret)
break;
}
@@ -166,29 +163,29 @@ static int cgrp_css_online(struct cgroup *cgrp)
return ret;
}
-static void cgrp_css_free(struct cgroup *cgrp)
+static void cgrp_css_free(struct cgroup_subsys_state *css)
{
- kfree(cgrp_netprio_state(cgrp));
+ kfree(css);
}
-static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft)
+static u64 read_prioidx(struct cgroup_subsys_state *css, struct cftype *cft)
{
- return cgrp->id;
+ return css->cgroup->id;
}
-static int read_priomap(struct cgroup *cont, struct cftype *cft,
+static int read_priomap(struct cgroup_subsys_state *css, struct cftype *cft,
struct cgroup_map_cb *cb)
{
struct net_device *dev;
rcu_read_lock();
for_each_netdev_rcu(&init_net, dev)
- cb->fill(cb, dev->name, netprio_prio(cont, dev));
+ cb->fill(cb, dev->name, netprio_prio(css, dev));
rcu_read_unlock();
return 0;
}
-static int write_priomap(struct cgroup *cgrp, struct cftype *cft,
+static int write_priomap(struct cgroup_subsys_state *css, struct cftype *cft,
const char *buffer)
{
char devname[IFNAMSIZ + 1];
@@ -205,7 +202,7 @@ static int write_priomap(struct cgroup *cgrp, struct cftype *cft,
rtnl_lock();
- ret = netprio_set_prio(cgrp, dev, prio);
+ ret = netprio_set_prio(css, dev, prio);
rtnl_unlock();
dev_put(dev);
@@ -221,12 +218,13 @@ static int update_netprio(const void *v, struct file *file, unsigned n)
return 0;
}
-static void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+static void net_prio_attach(struct cgroup_subsys_state *css,
+ struct cgroup_taskset *tset)
{
struct task_struct *p;
void *v;
- cgroup_taskset_for_each(p, cgrp, tset) {
+ cgroup_taskset_for_each(p, css, tset) {
task_lock(p);
v = (void *)(unsigned long)task_netprioidx(p);
iterate_fd(p->files, 0, update_netprio, v);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 9640972ec50e..261357a66300 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -160,6 +160,8 @@
#include <net/net_namespace.h>
#include <net/checksum.h>
#include <net/ipv6.h>
+#include <net/udp.h>
+#include <net/ip6_checksum.h>
#include <net/addrconf.h>
#ifdef CONFIG_XFRM
#include <net/xfrm.h>
@@ -198,6 +200,7 @@
#define F_QUEUE_MAP_RND (1<<13) /* queue map Random */
#define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */
#define F_NODE (1<<15) /* Node memory alloc*/
+#define F_UDPCSUM (1<<16) /* Include UDP checksum */
/* Thread control flag bits */
#define T_STOP (1<<0) /* Stop run */
@@ -631,6 +634,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
if (pkt_dev->flags & F_UDPDST_RND)
seq_printf(seq, "UDPDST_RND ");
+ if (pkt_dev->flags & F_UDPCSUM)
+ seq_printf(seq, "UDPCSUM ");
+
if (pkt_dev->flags & F_MPLS_RND)
seq_printf(seq, "MPLS_RND ");
@@ -1228,6 +1234,12 @@ static ssize_t pktgen_if_write(struct file *file,
else if (strcmp(f, "!NODE_ALLOC") == 0)
pkt_dev->flags &= ~F_NODE;
+ else if (strcmp(f, "UDPCSUM") == 0)
+ pkt_dev->flags |= F_UDPCSUM;
+
+ else if (strcmp(f, "!UDPCSUM") == 0)
+ pkt_dev->flags &= ~F_UDPCSUM;
+
else {
sprintf(pg_result,
"Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
@@ -2733,7 +2745,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
udph->source = htons(pkt_dev->cur_udp_src);
udph->dest = htons(pkt_dev->cur_udp_dst);
udph->len = htons(datalen + 8); /* DATA + udphdr */
- udph->check = 0; /* No checksum */
+ udph->check = 0;
iph->ihl = 5;
iph->version = 4;
@@ -2747,11 +2759,28 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
iph->frag_off = 0;
iplen = 20 + 8 + datalen;
iph->tot_len = htons(iplen);
- iph->check = 0;
- iph->check = ip_fast_csum((void *)iph, iph->ihl);
+ ip_send_check(iph);
skb->protocol = protocol;
skb->dev = odev;
skb->pkt_type = PACKET_HOST;
+
+ if (!(pkt_dev->flags & F_UDPCSUM)) {
+ skb->ip_summed = CHECKSUM_NONE;
+ } else if (odev->features & NETIF_F_V4_CSUM) {
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum = 0;
+ udp4_hwcsum(skb, udph->source, udph->dest);
+ } else {
+ __wsum csum = udp_csum(skb);
+
+ /* add protocol-dependent pseudo-header */
+ udph->check = csum_tcpudp_magic(udph->source, udph->dest,
+ datalen + 8, IPPROTO_UDP, csum);
+
+ if (udph->check == 0)
+ udph->check = CSUM_MANGLED_0;
+ }
+
pktgen_finalize_skb(pkt_dev, skb, datalen);
#ifdef CONFIG_XFRM
@@ -2768,7 +2797,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
struct sk_buff *skb = NULL;
__u8 *eth;
struct udphdr *udph;
- int datalen;
+ int datalen, udplen;
struct ipv6hdr *iph;
__be16 protocol = htons(ETH_P_IPV6);
__be32 *mpls;
@@ -2844,10 +2873,11 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
net_info_ratelimited("increased datalen to %d\n", datalen);
}
+ udplen = datalen + sizeof(struct udphdr);
udph->source = htons(pkt_dev->cur_udp_src);
udph->dest = htons(pkt_dev->cur_udp_dst);
- udph->len = htons(datalen + sizeof(struct udphdr));
- udph->check = 0; /* No checksum */
+ udph->len = htons(udplen);
+ udph->check = 0;
*(__be32 *) iph = htonl(0x60000000); /* Version + flow */
@@ -2858,7 +2888,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
iph->hop_limit = 32;
- iph->payload_len = htons(sizeof(struct udphdr) + datalen);
+ iph->payload_len = htons(udplen);
iph->nexthdr = IPPROTO_UDP;
iph->daddr = pkt_dev->cur_in6_daddr;
@@ -2868,6 +2898,23 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
skb->dev = odev;
skb->pkt_type = PACKET_HOST;
+ if (!(pkt_dev->flags & F_UDPCSUM)) {
+ skb->ip_summed = CHECKSUM_NONE;
+ } else if (odev->features & NETIF_F_V6_CSUM) {
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
+ udph->check = ~csum_ipv6_magic(&iph->saddr, &iph->daddr, udplen, IPPROTO_UDP, 0);
+ } else {
+ __wsum csum = udp_csum(skb);
+
+ /* add protocol-dependent pseudo-header */
+ udph->check = csum_ipv6_magic(&iph->saddr, &iph->daddr, udplen, IPPROTO_UDP, csum);
+
+ if (udph->check == 0)
+ udph->check = CSUM_MANGLED_0;
+ }
+
pktgen_finalize_skb(pkt_dev, skb, datalen);
return skb;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index ca198c1d1d30..2a0e21de3060 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -767,7 +767,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
+ rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
+ rtnl_link_get_size(dev) /* IFLA_LINKINFO */
- + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */
+ + rtnl_link_get_af_size(dev) /* IFLA_AF_SPEC */
+ + nla_total_size(MAX_PHYS_PORT_ID_LEN); /* IFLA_PHYS_PORT_ID */
}
static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
@@ -846,6 +847,24 @@ static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev)
return 0;
}
+static int rtnl_phys_port_id_fill(struct sk_buff *skb, struct net_device *dev)
+{
+ int err;
+ struct netdev_phys_port_id ppid;
+
+ err = dev_get_phys_port_id(dev, &ppid);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+
+ if (nla_put(skb, IFLA_PHYS_PORT_ID, ppid.id_len, ppid.id))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
int type, u32 pid, u32 seq, u32 change,
unsigned int flags, u32 ext_filter_mask)
@@ -913,6 +932,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
goto nla_put_failure;
}
+ if (rtnl_phys_port_id_fill(skb, dev))
+ goto nla_put_failure;
+
attr = nla_reserve(skb, IFLA_STATS,
sizeof(struct rtnl_link_stats));
if (attr == NULL)
@@ -1113,6 +1135,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_PROMISCUITY] = { .type = NLA_U32 },
[IFLA_NUM_TX_QUEUES] = { .type = NLA_U32 },
[IFLA_NUM_RX_QUEUES] = { .type = NLA_U32 },
+ [IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_PORT_ID_LEN },
};
EXPORT_SYMBOL(ifla_policy);
@@ -1844,10 +1867,10 @@ replay:
else
err = register_netdevice(dev);
- if (err < 0 && !IS_ERR(dev))
+ if (err < 0) {
free_netdev(dev);
- if (err < 0)
goto out;
+ }
err = rtnl_configure_link(dev, ifm);
if (err < 0)
diff --git a/net/core/scm.c b/net/core/scm.c
index 03795d0147f2..b442e7e25e60 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -54,11 +54,11 @@ static __inline__ int scm_check_creds(struct ucred *creds)
return -EINVAL;
if ((creds->pid == task_tgid_vnr(current) ||
- ns_capable(current->nsproxy->pid_ns->user_ns, CAP_SYS_ADMIN)) &&
+ ns_capable(task_active_pid_ns(current)->user_ns, CAP_SYS_ADMIN)) &&
((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) ||
- uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) &&
+ uid_eq(uid, cred->suid)) || ns_capable(cred->user_ns, CAP_SETUID)) &&
((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) ||
- gid_eq(gid, cred->sgid)) || nsown_capable(CAP_SETGID))) {
+ gid_eq(gid, cred->sgid)) || ns_capable(cred->user_ns, CAP_SETGID))) {
return 0;
}
return -EPERM;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 2c3d0f53d198..d81cff119f73 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3500,17 +3500,22 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
EXPORT_SYMBOL(skb_try_coalesce);
/**
- * skb_scrub_packet - scrub an skb before sending it to another netns
+ * skb_scrub_packet - scrub an skb
*
* @skb: buffer to clean
- *
- * skb_scrub_packet can be used to clean an skb before injecting it in
- * another namespace. We have to clear all information in the skb that
- * could impact namespace isolation.
+ * @xnet: packet is crossing netns
+ *
+ * skb_scrub_packet can be used after encapsulating or decapsulting a packet
+ * into/from a tunnel. Some information have to be cleared during these
+ * operations.
+ * skb_scrub_packet can also be used to clean a skb before injecting it in
+ * another namespace (@xnet == true). We have to clear all information in the
+ * skb that could impact namespace isolation.
*/
-void skb_scrub_packet(struct sk_buff *skb)
+void skb_scrub_packet(struct sk_buff *skb, bool xnet)
{
- skb_orphan(skb);
+ if (xnet)
+ skb_orphan(skb);
skb->tstamp.tv64 = 0;
skb->pkt_type = PACKET_HOST;
skb->skb_iif = 0;
diff --git a/net/core/sock.c b/net/core/sock.c
index 2c097c5a35dd..5b6beba494a3 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -93,6 +93,7 @@
#include <linux/capability.h>
#include <linux/errno.h>
+#include <linux/errqueue.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/in.h>
@@ -1575,6 +1576,25 @@ void sock_wfree(struct sk_buff *skb)
}
EXPORT_SYMBOL(sock_wfree);
+void skb_orphan_partial(struct sk_buff *skb)
+{
+ /* TCP stack sets skb->ooo_okay based on sk_wmem_alloc,
+ * so we do not completely orphan skb, but transfert all
+ * accounted bytes but one, to avoid unexpected reorders.
+ */
+ if (skb->destructor == sock_wfree
+#ifdef CONFIG_INET
+ || skb->destructor == tcp_wfree
+#endif
+ ) {
+ atomic_sub(skb->truesize - 1, &skb->sk->sk_wmem_alloc);
+ skb->truesize = 1;
+ } else {
+ skb_orphan(skb);
+ }
+}
+EXPORT_SYMBOL(skb_orphan_partial);
+
/*
* Read buffer destructor automatically called from kfree_skb.
*/
@@ -1721,24 +1741,23 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
unsigned long data_len, int noblock,
- int *errcode)
+ int *errcode, int max_page_order)
{
- struct sk_buff *skb;
+ struct sk_buff *skb = NULL;
+ unsigned long chunk;
gfp_t gfp_mask;
long timeo;
int err;
int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+ struct page *page;
+ int i;
err = -EMSGSIZE;
if (npages > MAX_SKB_FRAGS)
goto failure;
- gfp_mask = sk->sk_allocation;
- if (gfp_mask & __GFP_WAIT)
- gfp_mask |= __GFP_REPEAT;
-
timeo = sock_sndtimeo(sk, noblock);
- while (1) {
+ while (!skb) {
err = sock_error(sk);
if (err != 0)
goto failure;
@@ -1747,50 +1766,52 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
if (sk->sk_shutdown & SEND_SHUTDOWN)
goto failure;
- if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
- skb = alloc_skb(header_len, gfp_mask);
- if (skb) {
- int i;
-
- /* No pages, we're done... */
- if (!data_len)
- break;
-
- skb->truesize += data_len;
- skb_shinfo(skb)->nr_frags = npages;
- for (i = 0; i < npages; i++) {
- struct page *page;
-
- page = alloc_pages(sk->sk_allocation, 0);
- if (!page) {
- err = -ENOBUFS;
- skb_shinfo(skb)->nr_frags = i;
- kfree_skb(skb);
- goto failure;
- }
-
- __skb_fill_page_desc(skb, i,
- page, 0,
- (data_len >= PAGE_SIZE ?
- PAGE_SIZE :
- data_len));
- data_len -= PAGE_SIZE;
- }
+ if (atomic_read(&sk->sk_wmem_alloc) >= sk->sk_sndbuf) {
+ set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ err = -EAGAIN;
+ if (!timeo)
+ goto failure;
+ if (signal_pending(current))
+ goto interrupted;
+ timeo = sock_wait_for_wmem(sk, timeo);
+ continue;
+ }
- /* Full success... */
- break;
- }
- err = -ENOBUFS;
+ err = -ENOBUFS;
+ gfp_mask = sk->sk_allocation;
+ if (gfp_mask & __GFP_WAIT)
+ gfp_mask |= __GFP_REPEAT;
+
+ skb = alloc_skb(header_len, gfp_mask);
+ if (!skb)
goto failure;
+
+ skb->truesize += data_len;
+
+ for (i = 0; npages > 0; i++) {
+ int order = max_page_order;
+
+ while (order) {
+ if (npages >= 1 << order) {
+ page = alloc_pages(sk->sk_allocation |
+ __GFP_COMP | __GFP_NOWARN,
+ order);
+ if (page)
+ goto fill_page;
+ }
+ order--;
+ }
+ page = alloc_page(sk->sk_allocation);
+ if (!page)
+ goto failure;
+fill_page:
+ chunk = min_t(unsigned long, data_len,
+ PAGE_SIZE << order);
+ skb_fill_page_desc(skb, i, page, 0, chunk);
+ data_len -= chunk;
+ npages -= 1 << order;
}
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
- set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
- err = -EAGAIN;
- if (!timeo)
- goto failure;
- if (signal_pending(current))
- goto interrupted;
- timeo = sock_wait_for_wmem(sk, timeo);
}
skb_set_owner_w(skb, sk);
@@ -1799,6 +1820,7 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
interrupted:
err = sock_intr_errno(timeo);
failure:
+ kfree_skb(skb);
*errcode = err;
return NULL;
}
@@ -1807,7 +1829,7 @@ EXPORT_SYMBOL(sock_alloc_send_pskb);
struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
int noblock, int *errcode)
{
- return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
+ return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0);
}
EXPORT_SYMBOL(sock_alloc_send_skb);
@@ -2425,6 +2447,52 @@ void sock_enable_timestamp(struct sock *sk, int flag)
}
}
+int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
+ int level, int type)
+{
+ struct sock_exterr_skb *serr;
+ struct sk_buff *skb, *skb2;
+ int copied, err;
+
+ err = -EAGAIN;
+ skb = skb_dequeue(&sk->sk_error_queue);
+ if (skb == NULL)
+ goto out;
+
+ copied = skb->len;
+ if (copied > len) {
+ msg->msg_flags |= MSG_TRUNC;
+ copied = len;
+ }
+ err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ if (err)
+ goto out_free_skb;
+
+ sock_recv_timestamp(msg, sk, skb);
+
+ serr = SKB_EXT_ERR(skb);
+ put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee);
+
+ msg->msg_flags |= MSG_ERRQUEUE;
+ err = copied;
+
+ /* Reset and regenerate socket error */
+ spin_lock_bh(&sk->sk_error_queue.lock);
+ sk->sk_err = 0;
+ if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
+ sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
+ spin_unlock_bh(&sk->sk_error_queue.lock);
+ sk->sk_error_report(sk);
+ } else
+ spin_unlock_bh(&sk->sk_error_queue.lock);
+
+out_free_skb:
+ kfree_skb(skb);
+out:
+ return err;
+}
+EXPORT_SYMBOL(sock_recv_errqueue);
+
/*
* Get a socket option on an socket.
*
diff --git a/net/core/stream.c b/net/core/stream.c
index f5df85dcd20b..512f0a24269b 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -30,7 +30,7 @@ void sk_stream_write_space(struct sock *sk)
struct socket *sock = sk->sk_socket;
struct socket_wq *wq;
- if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) {
+ if (sk_stream_is_writeable(sk) && sock) {
clear_bit(SOCK_NOSPACE, &sock->flags);
rcu_read_lock();
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 31107abd2783..cca444190907 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -20,6 +20,7 @@
#include <net/sock.h>
#include <net/net_ratelimit.h>
#include <net/busy_poll.h>
+#include <net/pkt_sched.h>
static int zero = 0;
static int one = 1;
@@ -193,6 +194,26 @@ static int flow_limit_table_len_sysctl(struct ctl_table *table, int write,
}
#endif /* CONFIG_NET_FLOW_LIMIT */
+#ifdef CONFIG_NET_SCHED
+static int set_default_qdisc(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ char id[IFNAMSIZ];
+ struct ctl_table tbl = {
+ .data = id,
+ .maxlen = IFNAMSIZ,
+ };
+ int ret;
+
+ qdisc_get_default(id, IFNAMSIZ);
+
+ ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
+ if (write && ret == 0)
+ ret = qdisc_set_default(id);
+ return ret;
+}
+#endif
+
static struct ctl_table net_core_table[] = {
#ifdef CONFIG_NET
{
@@ -315,7 +336,14 @@ static struct ctl_table net_core_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
-#
+#endif
+#ifdef CONFIG_NET_SCHED
+ {
+ .procname = "default_qdisc",
+ .mode = 0644,
+ .maxlen = IFNAMSIZ,
+ .proc_handler = set_default_qdisc
+ },
#endif
#endif /* CONFIG_NET */
{