From 2c8c1e7297e19bdef3c178c3ea41d898a7716e3e Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 17 Jan 2010 03:35:32 +0000 Subject: net: spread __net_init, __net_exit __net_init/__net_exit are apparently not going away, so use them to full extent. In some cases __net_init was removed, because it was called from __net_exit code. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/packet/af_packet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/packet') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index e0516a22be2e..a97acfe7e770 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2448,7 +2448,7 @@ static const struct file_operations packet_seq_fops = { #endif -static int packet_net_init(struct net *net) +static int __net_init packet_net_init(struct net *net) { rwlock_init(&net->packet.sklist_lock); INIT_HLIST_HEAD(&net->packet.sklist); @@ -2459,7 +2459,7 @@ static int packet_net_init(struct net *net) return 0; } -static void packet_net_exit(struct net *net) +static void __net_exit packet_net_exit(struct net *net) { proc_net_remove(net, "packet"); } -- cgit v1.2.3 From bfd5f4a3d605e0f6054df0b59fe0907ff7e696d3 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Thu, 4 Feb 2010 20:24:10 -0800 Subject: packet: Add GSO/csum offload support. This patch adds GSO/checksum offload to af_packet sockets using virtio_net_hdr. Based on Rusty's patch to add this support to tun. It allows GSO/checksum offload to be enabled when using raw socket backend with virtio_net. Adds PACKET_VNET_HDR socket option to prepend virtio_net_hdr in the receive path and process/skip virtio_net_hdr in the send path. This option is only allowed with SOCK_RAW sockets attached to ethernet type devices. v2 updates ---------- Michael's Comments - Perform length check in packet_snd() when GSO is off even when vnet_hdr is present. - Check for SKB_GSO_FCOE type and return -EINVAL - don't allow tx/rx ring when vnet_hdr is enabled. Herbert's Comments - Removed ethernet specific code. - protocol value is assumed to be passed in by the caller. Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- net/packet/af_packet.c | 187 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 176 insertions(+), 11 deletions(-) (limited to 'net/packet') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 53633c5fdb1d..178e2937bbaa 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -80,6 +80,7 @@ #include #include #include +#include #ifdef CONFIG_INET #include @@ -193,7 +194,8 @@ struct packet_sock { struct mutex pg_vec_lock; unsigned int running:1, /* prot_hook is attached*/ auxdata:1, - origdev:1; + origdev:1, + has_vnet_hdr:1; int ifindex; /* bound device */ __be16 num; struct packet_mclist *mclist; @@ -1056,6 +1058,30 @@ out: } #endif +static inline struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad, + size_t reserve, size_t len, + size_t linear, int noblock, + int *err) +{ + struct sk_buff *skb; + + /* Under a page? Don't bother with paged skb. */ + if (prepad + len < PAGE_SIZE || !linear) + linear = len; + + skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, + err); + if (!skb) + return NULL; + + skb_reserve(skb, reserve); + skb_put(skb, linear); + skb->data_len = len - linear; + skb->len += len - linear; + + return skb; +} + static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) { @@ -1066,14 +1092,17 @@ static int packet_snd(struct socket *sock, __be16 proto; unsigned char *addr; int ifindex, err, reserve = 0; + struct virtio_net_hdr vnet_hdr = { 0 }; + int offset = 0; + int vnet_hdr_len; + struct packet_sock *po = pkt_sk(sk); + unsigned short gso_type = 0; /* * Get and verify the address. */ if (saddr == NULL) { - struct packet_sock *po = pkt_sk(sk); - ifindex = po->ifindex; proto = po->num; addr = NULL; @@ -1100,25 +1129,74 @@ static int packet_snd(struct socket *sock, if (!(dev->flags & IFF_UP)) goto out_unlock; + if (po->has_vnet_hdr) { + vnet_hdr_len = sizeof(vnet_hdr); + + err = -EINVAL; + if (len < vnet_hdr_len) + goto out_unlock; + + len -= vnet_hdr_len; + + err = memcpy_fromiovec((void *)&vnet_hdr, msg->msg_iov, + vnet_hdr_len); + if (err < 0) + goto out_unlock; + + if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && + (vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 > + vnet_hdr.hdr_len)) + vnet_hdr.hdr_len = vnet_hdr.csum_start + + vnet_hdr.csum_offset + 2; + + err = -EINVAL; + if (vnet_hdr.hdr_len > len) + goto out_unlock; + + if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) { + switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + case VIRTIO_NET_HDR_GSO_TCPV4: + gso_type = SKB_GSO_TCPV4; + break; + case VIRTIO_NET_HDR_GSO_TCPV6: + gso_type = SKB_GSO_TCPV6; + break; + case VIRTIO_NET_HDR_GSO_UDP: + gso_type = SKB_GSO_UDP; + break; + default: + goto out_unlock; + } + + if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN) + gso_type |= SKB_GSO_TCP_ECN; + + if (vnet_hdr.gso_size == 0) + goto out_unlock; + + } + } + err = -EMSGSIZE; - if (len > dev->mtu+reserve) + if (!gso_type && (len > dev->mtu+reserve)) goto out_unlock; - skb = sock_alloc_send_skb(sk, len + LL_ALLOCATED_SPACE(dev), - msg->msg_flags & MSG_DONTWAIT, &err); + err = -ENOBUFS; + skb = packet_alloc_skb(sk, LL_ALLOCATED_SPACE(dev), + LL_RESERVED_SPACE(dev), len, vnet_hdr.hdr_len, + msg->msg_flags & MSG_DONTWAIT, &err); if (skb == NULL) goto out_unlock; - skb_reserve(skb, LL_RESERVED_SPACE(dev)); - skb_reset_network_header(skb); + skb_set_network_header(skb, reserve); err = -EINVAL; if (sock->type == SOCK_DGRAM && - dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len) < 0) + (offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len)) < 0) goto out_free; /* Returns -EFAULT on error */ - err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); + err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len); if (err) goto out_free; @@ -1127,6 +1205,25 @@ static int packet_snd(struct socket *sock, skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; + if (po->has_vnet_hdr) { + if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { + if (!skb_partial_csum_set(skb, vnet_hdr.csum_start, + vnet_hdr.csum_offset)) { + err = -EINVAL; + goto out_free; + } + } + + skb_shinfo(skb)->gso_size = vnet_hdr.gso_size; + skb_shinfo(skb)->gso_type = gso_type; + + /* Header must be checked, and gso_segs computed. */ + skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; + skb_shinfo(skb)->gso_segs = 0; + + len += vnet_hdr_len; + } + /* * Now send it */ @@ -1420,6 +1517,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, struct sk_buff *skb; int copied, err; struct sockaddr_ll *sll; + int vnet_hdr_len = 0; err = -EINVAL; if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) @@ -1451,6 +1549,48 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, if (skb == NULL) goto out; + if (pkt_sk(sk)->has_vnet_hdr) { + struct virtio_net_hdr vnet_hdr = { 0 }; + + err = -EINVAL; + vnet_hdr_len = sizeof(vnet_hdr); + if ((len -= vnet_hdr_len) < 0) + goto out_free; + + if (skb_is_gso(skb)) { + struct skb_shared_info *sinfo = skb_shinfo(skb); + + /* This is a hint as to how much should be linear. */ + vnet_hdr.hdr_len = skb_headlen(skb); + vnet_hdr.gso_size = sinfo->gso_size; + if (sinfo->gso_type & SKB_GSO_TCPV4) + vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4; + else if (sinfo->gso_type & SKB_GSO_TCPV6) + vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6; + else if (sinfo->gso_type & SKB_GSO_UDP) + vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP; + else if (sinfo->gso_type & SKB_GSO_FCOE) + goto out_free; + else + BUG(); + if (sinfo->gso_type & SKB_GSO_TCP_ECN) + vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN; + } else + vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE; + + if (skb->ip_summed == CHECKSUM_PARTIAL) { + vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + vnet_hdr.csum_start = skb->csum_start - + skb_headroom(skb); + vnet_hdr.csum_offset = skb->csum_offset; + } /* else everything is zero */ + + err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr, + vnet_hdr_len); + if (err < 0) + goto out_free; + } + /* * If the address length field is there to be filled in, we fill * it in now. @@ -1502,7 +1642,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, * Free or return the buffer as appropriate. Again this * hides all the races and re-entrancy issues from us. */ - err = (flags&MSG_TRUNC) ? skb->len : copied; + err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied); out_free: skb_free_datagram(sk, skb); @@ -1740,6 +1880,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen < sizeof(req)) return -EINVAL; + if (pkt_sk(sk)->has_vnet_hdr) + return -EINVAL; if (copy_from_user(&req, optval, sizeof(req))) return -EFAULT; return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING); @@ -1826,6 +1968,22 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv po->origdev = !!val; return 0; } + case PACKET_VNET_HDR: + { + int val; + + if (sock->type != SOCK_RAW) + return -EINVAL; + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) + return -EBUSY; + if (optlen < sizeof(val)) + return -EINVAL; + if (copy_from_user(&val, optval, sizeof(val))) + return -EFAULT; + + po->has_vnet_hdr = !!val; + return 0; + } default: return -ENOPROTOOPT; } @@ -1874,6 +2032,13 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, len = sizeof(int); val = po->origdev; + data = &val; + break; + case PACKET_VNET_HDR: + if (len > sizeof(int)) + len = sizeof(int); + val = po->has_vnet_hdr; + data = &val; break; #ifdef CONFIG_PACKET_MMAP -- cgit v1.2.3 From 889b8f964f2f226b7cd5a0a515109e3d8d9d1613 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 5 Feb 2010 16:29:48 -0800 Subject: packet: Kill CONFIG_PACKET_MMAP. Early on this was an experimental facility that few people other than Alexey Kuznetsov played with. Now it's a pretty fundamental thing and as people add more features to AF_PACKET sockets this config options creates ifdef spaghetti. So kill it off. Signed-off-by: David S. Miller --- net/packet/Kconfig | 10 ---------- net/packet/af_packet.c | 29 ----------------------------- 2 files changed, 39 deletions(-) (limited to 'net/packet') diff --git a/net/packet/Kconfig b/net/packet/Kconfig index 34ff93ff894d..0060e3b396b7 100644 --- a/net/packet/Kconfig +++ b/net/packet/Kconfig @@ -14,13 +14,3 @@ config PACKET be called af_packet. If unsure, say Y. - -config PACKET_MMAP - bool "Packet socket: mmapped IO" - depends on PACKET - help - If you say Y here, the Packet protocol driver will use an IO - mechanism that results in faster communication. - - If unsure, say N. - diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 178e2937bbaa..6ecb426bc0cf 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -157,7 +157,6 @@ struct packet_mreq_max { unsigned char mr_address[MAX_ADDR_LEN]; }; -#ifdef CONFIG_PACKET_MMAP static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing, int tx_ring); @@ -177,7 +176,6 @@ struct packet_ring_buffer { struct packet_sock; static int tpacket_snd(struct packet_sock *po, struct msghdr *msg); -#endif static void packet_flush_mclist(struct sock *sk); @@ -185,11 +183,9 @@ struct packet_sock { /* struct sock has to be the first member of packet_sock */ struct sock sk; struct tpacket_stats stats; -#ifdef CONFIG_PACKET_MMAP struct packet_ring_buffer rx_ring; struct packet_ring_buffer tx_ring; int copy_thresh; -#endif spinlock_t bind_lock; struct mutex pg_vec_lock; unsigned int running:1, /* prot_hook is attached*/ @@ -199,13 +195,11 @@ struct packet_sock { int ifindex; /* bound device */ __be16 num; struct packet_mclist *mclist; -#ifdef CONFIG_PACKET_MMAP atomic_t mapped; enum tpacket_versions tp_version; unsigned int tp_hdrlen; unsigned int tp_reserve; unsigned int tp_loss:1; -#endif struct packet_type prot_hook ____cacheline_aligned_in_smp; }; @@ -219,8 +213,6 @@ struct packet_skb_cb { #define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb)) -#ifdef CONFIG_PACKET_MMAP - static void __packet_set_status(struct packet_sock *po, void *frame, int status) { union { @@ -315,8 +307,6 @@ static inline void packet_increment_head(struct packet_ring_buffer *buff) buff->head = buff->head != buff->frame_max ? buff->head+1 : 0; } -#endif - static inline struct packet_sock *pkt_sk(struct sock *sk) { return (struct packet_sock *)sk; @@ -640,7 +630,6 @@ drop: return 0; } -#ifdef CONFIG_PACKET_MMAP static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { @@ -1056,7 +1045,6 @@ out: mutex_unlock(&po->pg_vec_lock); return err; } -#endif static inline struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad, size_t reserve, size_t len, @@ -1248,13 +1236,11 @@ out: static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len) { -#ifdef CONFIG_PACKET_MMAP struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); if (po->tx_ring.pg_vec) return tpacket_snd(po, msg); else -#endif return packet_snd(sock, msg, len); } @@ -1268,9 +1254,7 @@ static int packet_release(struct socket *sock) struct sock *sk = sock->sk; struct packet_sock *po; struct net *net; -#ifdef CONFIG_PACKET_MMAP struct tpacket_req req; -#endif if (!sk) return 0; @@ -1299,7 +1283,6 @@ static int packet_release(struct socket *sock) packet_flush_mclist(sk); -#ifdef CONFIG_PACKET_MMAP memset(&req, 0, sizeof(req)); if (po->rx_ring.pg_vec) @@ -1307,7 +1290,6 @@ static int packet_release(struct socket *sock) if (po->tx_ring.pg_vec) packet_set_ring(sk, &req, 1, 1); -#endif /* * Now the socket is dead. No more input will appear. @@ -1872,7 +1854,6 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv return ret; } -#ifdef CONFIG_PACKET_MMAP case PACKET_RX_RING: case PACKET_TX_RING: { @@ -1943,7 +1924,6 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv po->tp_loss = !!val; return 0; } -#endif case PACKET_AUXDATA: { int val; @@ -2041,7 +2021,6 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, data = &val; break; -#ifdef CONFIG_PACKET_MMAP case PACKET_VERSION: if (len > sizeof(int)) len = sizeof(int); @@ -2077,7 +2056,6 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, val = po->tp_loss; data = &val; break; -#endif default: return -ENOPROTOOPT; } @@ -2197,11 +2175,6 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd, return 0; } -#ifndef CONFIG_PACKET_MMAP -#define packet_mmap sock_no_mmap -#define packet_poll datagram_poll -#else - static unsigned int packet_poll(struct file *file, struct socket *sock, poll_table *wait) { @@ -2483,8 +2456,6 @@ out: mutex_unlock(&po->pg_vec_lock); return err; } -#endif - static const struct proto_ops packet_ops_spkt = { .family = PF_PACKET, -- cgit v1.2.3 From b7ceabd9b528417973619c5b655bc5b21857ac36 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 8 Feb 2010 23:19:29 +0000 Subject: net: packet: use seq_hlist_foo() helpers Simplify seq_file code. Signed-off-by: Li Zefan Signed-off-by: David S. Miller --- net/packet/af_packet.c | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) (limited to 'net/packet') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 6ecb426bc0cf..10f7295bcefb 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2510,33 +2510,19 @@ static struct notifier_block packet_netdev_notifier = { }; #ifdef CONFIG_PROC_FS -static inline struct sock *packet_seq_idx(struct net *net, loff_t off) -{ - struct sock *s; - struct hlist_node *node; - - sk_for_each(s, node, &net->packet.sklist) { - if (!off--) - return s; - } - return NULL; -} static void *packet_seq_start(struct seq_file *seq, loff_t *pos) __acquires(seq_file_net(seq)->packet.sklist_lock) { struct net *net = seq_file_net(seq); read_lock(&net->packet.sklist_lock); - return *pos ? packet_seq_idx(net, *pos - 1) : SEQ_START_TOKEN; + return seq_hlist_start_head(&net->packet.sklist, *pos); } static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct net *net = seq_file_net(seq); - ++*pos; - return (v == SEQ_START_TOKEN) - ? sk_head(&net->packet.sklist) - : sk_next((struct sock *)v) ; + return seq_hlist_next(v, &net->packet.sklist, pos); } static void packet_seq_stop(struct seq_file *seq, void *v) @@ -2551,7 +2537,7 @@ static int packet_seq_show(struct seq_file *seq, void *v) if (v == SEQ_START_TOKEN) seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n"); else { - struct sock *s = v; + struct sock *s = sk_entry(v); const struct packet_sock *po = pkt_sk(s); seq_printf(seq, -- cgit v1.2.3 From 808f5114a9206fee855117d416440e1071ab375c Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 22 Feb 2010 07:57:18 +0000 Subject: packet: convert socket list to RCU (v3) Convert AF_PACKET to use RCU, eliminating one more reader/writer lock. There is no need for a real sk_del_node_init_rcu(), because sk_del_node_init is doing the equivalent thing to hlst_del_init_rcu already; but added some comments to try and make that obvious. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/packet/af_packet.c | 62 +++++++++++++++++++++++++------------------------- 1 file changed, 31 insertions(+), 31 deletions(-) (limited to 'net/packet') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 10f7295bcefb..2f0369367ee0 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1262,24 +1262,22 @@ static int packet_release(struct socket *sock) net = sock_net(sk); po = pkt_sk(sk); - write_lock_bh(&net->packet.sklist_lock); - sk_del_node_init(sk); + spin_lock_bh(&net->packet.sklist_lock); + sk_del_node_init_rcu(sk); sock_prot_inuse_add(net, sk->sk_prot, -1); - write_unlock_bh(&net->packet.sklist_lock); - - /* - * Unhook packet receive handler. - */ + spin_unlock_bh(&net->packet.sklist_lock); + spin_lock(&po->bind_lock); if (po->running) { /* - * Remove the protocol hook + * Remove from protocol table */ - dev_remove_pack(&po->prot_hook); po->running = 0; po->num = 0; + __dev_remove_pack(&po->prot_hook); __sock_put(sk); } + spin_unlock(&po->bind_lock); packet_flush_mclist(sk); @@ -1291,10 +1289,10 @@ static int packet_release(struct socket *sock) if (po->tx_ring.pg_vec) packet_set_ring(sk, &req, 1, 1); + synchronize_net(); /* * Now the socket is dead. No more input will appear. */ - sock_orphan(sk); sock->sk = NULL; @@ -1478,10 +1476,11 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, po->running = 1; } - write_lock_bh(&net->packet.sklist_lock); - sk_add_node(sk, &net->packet.sklist); + spin_lock_bh(&net->packet.sklist_lock); + sk_add_node_rcu(sk, &net->packet.sklist); sock_prot_inuse_add(net, &packet_proto, 1); - write_unlock_bh(&net->packet.sklist_lock); + spin_unlock_bh(&net->packet.sklist_lock); + return 0; out: return err; @@ -2075,8 +2074,8 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void struct net_device *dev = data; struct net *net = dev_net(dev); - read_lock(&net->packet.sklist_lock); - sk_for_each(sk, node, &net->packet.sklist) { + rcu_read_lock(); + sk_for_each_rcu(sk, node, &net->packet.sklist) { struct packet_sock *po = pkt_sk(sk); switch (msg) { @@ -2104,18 +2103,19 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void } break; case NETDEV_UP: - spin_lock(&po->bind_lock); - if (dev->ifindex == po->ifindex && po->num && - !po->running) { - dev_add_pack(&po->prot_hook); - sock_hold(sk); - po->running = 1; + if (dev->ifindex == po->ifindex) { + spin_lock(&po->bind_lock); + if (po->num && !po->running) { + dev_add_pack(&po->prot_hook); + sock_hold(sk); + po->running = 1; + } + spin_unlock(&po->bind_lock); } - spin_unlock(&po->bind_lock); break; } } - read_unlock(&net->packet.sklist_lock); + rcu_read_unlock(); return NOTIFY_DONE; } @@ -2512,24 +2512,24 @@ static struct notifier_block packet_netdev_notifier = { #ifdef CONFIG_PROC_FS static void *packet_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(seq_file_net(seq)->packet.sklist_lock) + __acquires(RCU) { struct net *net = seq_file_net(seq); - read_lock(&net->packet.sklist_lock); - return seq_hlist_start_head(&net->packet.sklist, *pos); + + rcu_read_lock(); + return seq_hlist_start_head_rcu(&net->packet.sklist, *pos); } static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct net *net = seq_file_net(seq); - return seq_hlist_next(v, &net->packet.sklist, pos); + return seq_hlist_next_rcu(v, &net->packet.sklist, pos); } static void packet_seq_stop(struct seq_file *seq, void *v) - __releases(seq_file_net(seq)->packet.sklist_lock) + __releases(RCU) { - struct net *net = seq_file_net(seq); - read_unlock(&net->packet.sklist_lock); + rcu_read_unlock(); } static int packet_seq_show(struct seq_file *seq, void *v) @@ -2581,7 +2581,7 @@ static const struct file_operations packet_seq_fops = { static int __net_init packet_net_init(struct net *net) { - rwlock_init(&net->packet.sklist_lock); + spin_lock_init(&net->packet.sklist_lock); INIT_HLIST_HEAD(&net->packet.sklist); if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops)) -- cgit v1.2.3 From 914c8ad2d18b62ad1420f518c0cab0b0b90ab308 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 24 Feb 2010 23:57:04 +0000 Subject: af_packet: do not accept mc address smaller then dev->addr_len in packet_mc_add() There is no point of accepting an address of smaller length than dev->addr_len here. Therefore change this for stonger check. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/packet/af_packet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/packet') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 2f0369367ee0..e2d1def70841 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1734,7 +1734,7 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq) goto done; err = -EINVAL; - if (mreq->mr_alen > dev->addr_len) + if (mreq->mr_alen != dev->addr_len) goto done; err = -ENOBUFS; -- cgit v1.2.3 From 1162563f82b434e3099c9e6c1bbdba846d792f0d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 2 Mar 2010 20:40:01 +0000 Subject: af_packet: move strict addr_len check right before dev_[mc/unicast]_[add/del] My previous patch 914c8ad2d18b62ad1420f518c0cab0b0b90ab308 incorrectly changed the length check in packet_mc_add to be more strict. The problem is that userspace is not filling this field (and it stays zeroed) in case of setting PACKET_MR_PROMISC or PACKET_MR_ALLMULTI. So move the strict check to the point in path where the addr_len must be set correctly. Signed-off-by: Jiri Pirko Reported-by: Pavel Roskin Signed-off-by: David S. Miller --- net/packet/af_packet.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net/packet') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 031a5e6fb4aa..1612d417d10c 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1688,6 +1688,8 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i, { switch (i->type) { case PACKET_MR_MULTICAST: + if (i->alen != dev->addr_len) + return -EINVAL; if (what > 0) return dev_mc_add(dev, i->addr, i->alen, 0); else @@ -1700,6 +1702,8 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i, return dev_set_allmulti(dev, what); break; case PACKET_MR_UNICAST: + if (i->alen != dev->addr_len) + return -EINVAL; if (what > 0) return dev_unicast_add(dev, i->addr); else @@ -1734,7 +1738,7 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq) goto done; err = -EINVAL; - if (mreq->mr_alen != dev->addr_len) + if (mreq->mr_alen > dev->addr_len) goto done; err = -ENOBUFS; -- cgit v1.2.3 From 5a0e3ad6af8660be21ca98a971cd00f331318c05 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Mar 2010 17:04:11 +0900 Subject: include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo Guess-its-ok-by: Christoph Lameter Cc: Ingo Molnar Cc: Lee Schermerhorn --- net/packet/af_packet.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/packet') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 1612d417d10c..cc90363d7e7a 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -60,6 +60,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 1c4f0197323254e463b642abf2c8361e2a924859 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 14 Apr 2010 23:11:14 +0000 Subject: packet : remove init_net restriction The af_packet protocol is used by Perl to do ioctls as reported by Stephane Riviere: "Net::RawIP relies on SIOCGIFADDR et SIOCGIFHWADDR to get the IP and MAC addresses of the network interface." But in a new network namespace these ioctl fail because it is disabled for a namespace different from the init_net_ns. These two lines should not be there as af_inet and af_packet are namespace aware since a long time now. I suppose we forget to remove these lines because we sent the af_packet first, before af_inet was supported. Signed-off-by: Daniel Lezcano Reported-by: Stephane Riviere Signed-off-by: David S. Miller --- net/packet/af_packet.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/packet') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index cc90363d7e7a..243946d4809d 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2169,8 +2169,6 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd, case SIOCGIFDSTADDR: case SIOCSIFDSTADDR: case SIOCSIFFLAGS: - if (!net_eq(sock_net(sk), &init_net)) - return -ENOIOCTLCMD; return inet_dgram_ops.ioctl(sock, cmd, arg); #endif -- cgit v1.2.3