diff options
Diffstat (limited to 'net')
458 files changed, 8591 insertions, 3723 deletions
diff --git a/net/802/fc.c b/net/802/fc.c index 282c4ab1abe6..2a27e37bc4cb 100644 --- a/net/802/fc.c +++ b/net/802/fc.c @@ -10,7 +10,6 @@ * v 1.0 03/22/99 */ -#include <linux/config.h> #include <asm/uaccess.h> #include <asm/system.h> #include <linux/types.h> diff --git a/net/802/fddi.c b/net/802/fddi.c index ac242a4bc346..797c6d961deb 100644 --- a/net/802/fddi.c +++ b/net/802/fddi.c @@ -26,7 +26,6 @@ * Maciej W. Rozycki : IPv6 support */ -#include <linux/config.h> #include <linux/module.h> #include <asm/system.h> #include <linux/types.h> diff --git a/net/802/sysctl_net_802.c b/net/802/sysctl_net_802.c index 700129556c13..ead56037398b 100644 --- a/net/802/sysctl_net_802.c +++ b/net/802/sysctl_net_802.c @@ -10,7 +10,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/mm.h> #include <linux/if_tr.h> #include <linux/sysctl.h> diff --git a/net/802/tr.c b/net/802/tr.c index e9dc803f2fe0..d7d8f40c4fed 100644 --- a/net/802/tr.c +++ b/net/802/tr.c @@ -17,7 +17,6 @@ #include <asm/uaccess.h> #include <asm/system.h> -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 3948949a609a..458031bfff55 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -364,6 +364,14 @@ static void vlan_transfer_operstate(const struct net_device *dev, struct net_dev } } +/* + * vlan network devices have devices nesting below it, and are a special + * "super class" of normal network devices; split their locks off into a + * separate class since they always nest. + */ +static struct lock_class_key vlan_netdev_xmit_lock_key; + + /* Attach a VLAN device to a mac address (ie Ethernet Card). * Returns the device that was created, or NULL if there was * an error of some kind. @@ -460,6 +468,7 @@ static struct net_device *register_vlan_device(const char *eth_IF_name, new_dev = alloc_netdev(sizeof(struct vlan_dev_info), name, vlan_setup); + if (new_dev == NULL) goto out_unlock; @@ -518,6 +527,8 @@ static struct net_device *register_vlan_device(const char *eth_IF_name, if (register_netdevice(new_dev)) goto out_free_newdev; + lockdep_set_class(&new_dev->_xmit_lock, &vlan_netdev_xmit_lock_key); + new_dev->iflink = real_dev->ifindex; vlan_transfer_operstate(real_dev, new_dev); linkwatch_fire_event(new_dev); /* _MUST_ call rfc2863_policy() */ diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index 7b214cffc956..a8fc0de1f969 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -17,7 +17,6 @@ * Jan 20, 1998 Ben Greear Initial Version *****************************************************************************/ -#include <linux/config.h> #include <linux/module.h> #include <linux/stddef.h> /* offsetof(), etc. */ #include <linux/errno.h> /* return codes */ diff --git a/net/Kconfig b/net/Kconfig index 4193cdcd3ae7..c6cec5aa5486 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -66,6 +66,13 @@ source "net/ipv6/Kconfig" endif # if INET +config NETWORK_SECMARK + bool "Security Marking" + help + This enables security marking of network packets, similar + to nfmark, but designated for security purposes. + If you are unsure how to answer this question, answer N. + menuconfig NETFILTER bool "Network packet filtering (replaces ipchains)" ---help--- @@ -215,6 +222,21 @@ config NET_PKTGEN To compile this code as a module, choose M here: the module will be called pktgen. +config NET_TCPPROBE + tristate "TCP connection probing" + depends on INET && EXPERIMENTAL && PROC_FS && KPROBES + ---help--- + This module allows for capturing the changes to TCP connection + state in response to incoming packets. It is used for debugging + TCP congestion avoidance modules. If you don't understand + what was just said, you don't need it: say N. + + Documentation on how to use the packet generator can be found + at http://linux-net.osdl.org/index.php/TcpProbe + + To compile this code as a module, choose M here: the + module will be called tcp_probe. + endmenu endmenu diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 7076097debc2..f3777ec5bcb9 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -29,7 +29,6 @@ * */ -#include <linux/config.h> #include <linux/if_arp.h> #include <net/sock.h> #include <net/datalink.h> diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c index dc4048dd98c1..7ae4916cd26d 100644 --- a/net/appletalk/atalk_proc.c +++ b/net/appletalk/atalk_proc.c @@ -8,7 +8,6 @@ * Free Software Foundation, version 2. */ -#include <linux/config.h> #include <linux/init.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 7b1eb9a4fc96..5ee96d4b40e9 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -51,7 +51,6 @@ * */ -#include <linux/config.h> #include <linux/capability.h> #include <linux/module.h> #include <linux/if_arp.h> diff --git a/net/appletalk/sysctl_net_atalk.c b/net/appletalk/sysctl_net_atalk.c index af7f0604395d..40b0af7437a2 100644 --- a/net/appletalk/sysctl_net_atalk.c +++ b/net/appletalk/sysctl_net_atalk.c @@ -6,7 +6,6 @@ * Dynamic registration, added aarp entries. (5/30/97 Chris Horn) */ -#include <linux/config.h> #include <linux/sysctl.h> #include <net/sock.h> #include <linux/atalk.h> diff --git a/net/atm/Makefile b/net/atm/Makefile index d5818751f6ba..89656d6c0b90 100644 --- a/net/atm/Makefile +++ b/net/atm/Makefile @@ -2,7 +2,7 @@ # Makefile for the ATM Protocol Families. # -atm-y := addr.o pvc.o signaling.o svc.o ioctl.o common.o atm_misc.o raw.o resources.o +atm-y := addr.o pvc.o signaling.o svc.o ioctl.o common.o atm_misc.o raw.o resources.o atm_sysfs.o mpoa-objs := mpc.o mpoa_caches.o mpoa_proc.o obj-$(CONFIG_ATM) += atm.o diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c new file mode 100644 index 000000000000..5df4b9a068bb --- /dev/null +++ b/net/atm/atm_sysfs.c @@ -0,0 +1,176 @@ +/* ATM driver model support. */ + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/kobject.h> +#include <linux/atmdev.h> +#include "common.h" +#include "resources.h" + +#define to_atm_dev(cldev) container_of(cldev, struct atm_dev, class_dev) + +static ssize_t show_type(struct class_device *cdev, char *buf) +{ + struct atm_dev *adev = to_atm_dev(cdev); + return sprintf(buf, "%s\n", adev->type); +} + +static ssize_t show_address(struct class_device *cdev, char *buf) +{ + char *pos = buf; + struct atm_dev *adev = to_atm_dev(cdev); + int i; + + for (i = 0; i < (ESI_LEN - 1); i++) + pos += sprintf(pos, "%02x:", adev->esi[i]); + pos += sprintf(pos, "%02x\n", adev->esi[i]); + + return pos - buf; +} + +static ssize_t show_atmaddress(struct class_device *cdev, char *buf) +{ + unsigned long flags; + char *pos = buf; + struct atm_dev *adev = to_atm_dev(cdev); + struct atm_dev_addr *aaddr; + int bin[] = { 1, 2, 10, 6, 1 }, *fmt = bin; + int i, j; + + spin_lock_irqsave(&adev->lock, flags); + list_for_each_entry(aaddr, &adev->local, entry) { + for(i = 0, j = 0; i < ATM_ESA_LEN; ++i, ++j) { + if (j == *fmt) { + pos += sprintf(pos, "."); + ++fmt; + j = 0; + } + pos += sprintf(pos, "%02x", aaddr->addr.sas_addr.prv[i]); + } + pos += sprintf(pos, "\n"); + } + spin_unlock_irqrestore(&adev->lock, flags); + + return pos - buf; +} + +static ssize_t show_carrier(struct class_device *cdev, char *buf) +{ + char *pos = buf; + struct atm_dev *adev = to_atm_dev(cdev); + + pos += sprintf(pos, "%d\n", + adev->signal == ATM_PHY_SIG_LOST ? 0 : 1); + + return pos - buf; +} + +static ssize_t show_link_rate(struct class_device *cdev, char *buf) +{ + char *pos = buf; + struct atm_dev *adev = to_atm_dev(cdev); + int link_rate; + + /* show the link rate, not the data rate */ + switch (adev->link_rate) { + case ATM_OC3_PCR: + link_rate = 155520000; + break; + case ATM_OC12_PCR: + link_rate = 622080000; + break; + case ATM_25_PCR: + link_rate = 25600000; + break; + default: + link_rate = adev->link_rate * 8 * 53; + } + pos += sprintf(pos, "%d\n", link_rate); + + return pos - buf; +} + +static CLASS_DEVICE_ATTR(address, S_IRUGO, show_address, NULL); +static CLASS_DEVICE_ATTR(atmaddress, S_IRUGO, show_atmaddress, NULL); +static CLASS_DEVICE_ATTR(carrier, S_IRUGO, show_carrier, NULL); +static CLASS_DEVICE_ATTR(type, S_IRUGO, show_type, NULL); +static CLASS_DEVICE_ATTR(link_rate, S_IRUGO, show_link_rate, NULL); + +static struct class_device_attribute *atm_attrs[] = { + &class_device_attr_atmaddress, + &class_device_attr_address, + &class_device_attr_carrier, + &class_device_attr_type, + &class_device_attr_link_rate, + NULL +}; + +static int atm_uevent(struct class_device *cdev, char **envp, int num_envp, char *buf, int size) +{ + struct atm_dev *adev; + int i = 0, len = 0; + + if (!cdev) + return -ENODEV; + + adev = to_atm_dev(cdev); + if (!adev) + return -ENODEV; + + if (add_uevent_var(envp, num_envp, &i, buf, size, &len, + "NAME=%s%d", adev->type, adev->number)) + return -ENOMEM; + + envp[i] = NULL; + return 0; +} + +static void atm_release(struct class_device *cdev) +{ + struct atm_dev *adev = to_atm_dev(cdev); + + kfree(adev); +} + +static struct class atm_class = { + .name = "atm", + .release = atm_release, + .uevent = atm_uevent, +}; + +int atm_register_sysfs(struct atm_dev *adev) +{ + struct class_device *cdev = &adev->class_dev; + int i, err; + + cdev->class = &atm_class; + class_set_devdata(cdev, adev); + + snprintf(cdev->class_id, BUS_ID_SIZE, "%s%d", adev->type, adev->number); + err = class_device_register(cdev); + if (err < 0) + return err; + + for (i = 0; atm_attrs[i]; i++) + class_device_create_file(cdev, atm_attrs[i]); + + return 0; +} + +void atm_unregister_sysfs(struct atm_dev *adev) +{ + struct class_device *cdev = &adev->class_dev; + + class_device_del(cdev); +} + +int __init atm_sysfs_init(void) +{ + return class_register(&atm_class); +} + +void __exit atm_sysfs_exit(void) +{ + class_unregister(&atm_class); +} diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 680ccb12aae8..a487233dc466 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -5,7 +5,6 @@ Author: Marcell GAL, 2000, XDSL Ltd, Hungary */ #include <linux/module.h> -#include <linux/config.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/list.h> diff --git a/net/atm/clip.c b/net/atm/clip.c index 72d852982664..2e62105d91bd 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -2,7 +2,6 @@ /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ -#include <linux/config.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/kernel.h> /* for UINT_MAX */ @@ -24,6 +23,7 @@ #include <linux/if.h> /* for IFF_UP */ #include <linux/inetdevice.h> #include <linux/bitops.h> +#include <linux/poison.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/rcupdate.h> @@ -98,7 +98,7 @@ static void unlink_clip_vcc(struct clip_vcc *clip_vcc) printk(KERN_CRIT "!clip_vcc->entry (clip_vcc %p)\n", clip_vcc); return; } - spin_lock_bh(&entry->neigh->dev->xmit_lock); /* block clip_start_xmit() */ + netif_tx_lock_bh(entry->neigh->dev); /* block clip_start_xmit() */ entry->neigh->used = jiffies; for (walk = &entry->vccs; *walk; walk = &(*walk)->next) if (*walk == clip_vcc) { @@ -122,7 +122,7 @@ static void unlink_clip_vcc(struct clip_vcc *clip_vcc) printk(KERN_CRIT "ATMARP: unlink_clip_vcc failed (entry %p, vcc " "0x%p)\n", entry, clip_vcc); out: - spin_unlock_bh(&entry->neigh->dev->xmit_lock); + netif_tx_unlock_bh(entry->neigh->dev); } /* The neighbour entry n->lock is held. */ @@ -267,7 +267,7 @@ static void clip_neigh_destroy(struct neighbour *neigh) DPRINTK("clip_neigh_destroy (neigh %p)\n", neigh); if (NEIGH2ENTRY(neigh)->vccs) printk(KERN_CRIT "clip_neigh_destroy: vccs != NULL !!!\n"); - NEIGH2ENTRY(neigh)->vccs = (void *) 0xdeadbeef; + NEIGH2ENTRY(neigh)->vccs = (void *) NEIGHBOR_DEAD; } static void clip_neigh_solicit(struct neighbour *neigh, struct sk_buff *skb) @@ -962,7 +962,6 @@ static struct file_operations arp_seq_fops = { static int __init atm_clip_init(void) { - struct proc_dir_entry *p; neigh_table_init_no_netlink(&clip_tbl); clip_tbl_hook = &clip_tbl; @@ -972,9 +971,15 @@ static int __init atm_clip_init(void) setup_timer(&idle_timer, idle_timer_check, 0); - p = create_proc_entry("arp", S_IRUGO, atm_proc_root); - if (p) - p->proc_fops = &arp_seq_fops; +#ifdef CONFIG_PROC_FS + { + struct proc_dir_entry *p; + + p = create_proc_entry("arp", S_IRUGO, atm_proc_root); + if (p) + p->proc_fops = &arp_seq_fops; + } +#endif return 0; } diff --git a/net/atm/common.c b/net/atm/common.c index ae002220fa99..fbabff494468 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -3,7 +3,6 @@ /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ -#include <linux/config.h> #include <linux/module.h> #include <linux/kmod.h> #include <linux/net.h> /* struct socket, struct proto_ops */ @@ -791,8 +790,14 @@ static int __init atm_init(void) printk(KERN_ERR "atm_proc_init() failed with %d\n",error); goto out_atmsvc_exit; } + if ((error = atm_sysfs_init()) < 0) { + printk(KERN_ERR "atm_sysfs_init() failed with %d\n",error); + goto out_atmproc_exit; + } out: return error; +out_atmproc_exit: + atm_proc_exit(); out_atmsvc_exit: atmsvc_exit(); out_atmpvc_exit: @@ -805,6 +810,7 @@ out_unregister_vcc_proto: static void __exit atm_exit(void) { atm_proc_exit(); + atm_sysfs_exit(); atmsvc_exit(); atmpvc_exit(); proto_unregister(&vcc_proto); diff --git a/net/atm/common.h b/net/atm/common.h index 4887c317cefe..a422da7788fb 100644 --- a/net/atm/common.h +++ b/net/atm/common.h @@ -28,6 +28,8 @@ int atmpvc_init(void); void atmpvc_exit(void); int atmsvc_init(void); void atmsvc_exit(void); +int atm_sysfs_init(void); +void atm_sysfs_exit(void); #ifdef CONFIG_PROC_FS int atm_proc_init(void); diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c index 851cfa6312af..8c2022c3e81d 100644 --- a/net/atm/ioctl.c +++ b/net/atm/ioctl.c @@ -4,7 +4,6 @@ /* 2003 John Levon <levon@movementarian.org> */ -#include <linux/config.h> #include <linux/module.h> #include <linux/kmod.h> #include <linux/net.h> /* struct socket, struct proto_ops */ diff --git a/net/atm/ipcommon.c b/net/atm/ipcommon.c index 4b1faca5013f..1d3de42fada0 100644 --- a/net/atm/ipcommon.c +++ b/net/atm/ipcommon.c @@ -25,22 +25,27 @@ /* * skb_migrate appends the list at "from" to "to", emptying "from" in the * process. skb_migrate is atomic with respect to all other skb operations on - * "from" and "to". Note that it locks both lists at the same time, so beware - * of potential deadlocks. + * "from" and "to". Note that it locks both lists at the same time, so to deal + * with the lock ordering, the locks are taken in address order. * * This function should live in skbuff.c or skbuff.h. */ -void skb_migrate(struct sk_buff_head *from,struct sk_buff_head *to) +void skb_migrate(struct sk_buff_head *from, struct sk_buff_head *to) { unsigned long flags; struct sk_buff *skb_from = (struct sk_buff *) from; struct sk_buff *skb_to = (struct sk_buff *) to; struct sk_buff *prev; - spin_lock_irqsave(&from->lock,flags); - spin_lock(&to->lock); + if ((unsigned long) from < (unsigned long) to) { + spin_lock_irqsave(&from->lock, flags); + spin_lock_nested(&to->lock, SINGLE_DEPTH_NESTING); + } else { + spin_lock_irqsave(&to->lock, flags); + spin_lock_nested(&from->lock, SINGLE_DEPTH_NESTING); + } prev = from->prev; from->next->prev = to->prev; prev->next = skb_to; @@ -51,7 +56,7 @@ void skb_migrate(struct sk_buff_head *from,struct sk_buff_head *to) from->prev = skb_from; from->next = skb_from; from->qlen = 0; - spin_unlock_irqrestore(&from->lock,flags); + spin_unlock_irqrestore(&from->lock, flags); } diff --git a/net/atm/lec.c b/net/atm/lec.c index c4fc722fef9a..4b68a18171cf 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -4,7 +4,6 @@ * */ -#include <linux/config.h> #include <linux/kernel.h> #include <linux/bitops.h> #include <linux/capability.h> diff --git a/net/atm/lec.h b/net/atm/lec.h index 6606082b29a8..c22a8bfa1f81 100644 --- a/net/atm/lec.h +++ b/net/atm/lec.h @@ -9,7 +9,6 @@ #ifndef _LEC_H_ #define _LEC_H_ -#include <linux/config.h> #include <linux/atmdev.h> #include <linux/netdevice.h> #include <linux/atmlec.h> diff --git a/net/atm/mpc.c b/net/atm/mpc.c index c304ef1513b9..9aafe1e2f048 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -25,7 +25,6 @@ #include <linux/atmlec.h> #include <linux/atmmpc.h> /* Modular too */ -#include <linux/config.h> #include <linux/module.h> #include "lec.h" @@ -229,20 +228,15 @@ int atm_mpoa_delete_qos(struct atm_mpoa_qos *entry) /* this is buggered - we need locking for qos_head */ void atm_mpoa_disp_qos(struct seq_file *m) { - unsigned char *ip; - char ipaddr[16]; struct atm_mpoa_qos *qos; qos = qos_head; seq_printf(m, "QoS entries for shortcuts:\n"); seq_printf(m, "IP address\n TX:max_pcr pcr min_pcr max_cdv max_sdu\n RX:max_pcr pcr min_pcr max_cdv max_sdu\n"); - ipaddr[sizeof(ipaddr)-1] = '\0'; while (qos != NULL) { - ip = (unsigned char *)&qos->ipaddr; - sprintf(ipaddr, "%u.%u.%u.%u", NIPQUAD(ip)); seq_printf(m, "%u.%u.%u.%u\n %-7d %-7d %-7d %-7d %-7d\n %-7d %-7d %-7d %-7d %-7d\n", - NIPQUAD(ipaddr), + NIPQUAD(qos->ipaddr), qos->qos.txtp.max_pcr, qos->qos.txtp.pcr, qos->qos.txtp.min_pcr, qos->qos.txtp.max_cdv, qos->qos.txtp.max_sdu, qos->qos.rxtp.max_pcr, qos->qos.rxtp.pcr, qos->qos.rxtp.min_pcr, qos->qos.rxtp.max_cdv, qos->qos.rxtp.max_sdu); qos = qos->next; @@ -1083,7 +1077,6 @@ static void MPOA_trigger_rcvd(struct k_message *msg, struct mpoa_client *mpc) static void check_qos_and_open_shortcut(struct k_message *msg, struct mpoa_client *client, in_cache_entry *entry) { uint32_t dst_ip = msg->content.in_info.in_dst_ip; - unsigned char *ip __attribute__ ((unused)) = (unsigned char *)&dst_ip; struct atm_mpoa_qos *qos = atm_mpoa_search_qos(dst_ip); eg_cache_entry *eg_entry = client->eg_ops->get_by_src_ip(dst_ip, client); @@ -1097,7 +1090,7 @@ static void check_qos_and_open_shortcut(struct k_message *msg, struct mpoa_clien entry->shortcut = eg_entry->shortcut; } if(entry->shortcut){ - dprintk("mpoa: (%s) using egress SVC to reach %u.%u.%u.%u\n",client->dev->name, NIPQUAD(ip)); + dprintk("mpoa: (%s) using egress SVC to reach %u.%u.%u.%u\n",client->dev->name, NIPQUAD(dst_ip)); client->eg_ops->put(eg_entry); return; } @@ -1119,12 +1112,10 @@ static void check_qos_and_open_shortcut(struct k_message *msg, struct mpoa_clien static void MPOA_res_reply_rcvd(struct k_message *msg, struct mpoa_client *mpc) { - unsigned char *ip; - uint32_t dst_ip = msg->content.in_info.in_dst_ip; in_cache_entry *entry = mpc->in_ops->get(dst_ip, mpc); - ip = (unsigned char *)&dst_ip; - dprintk("mpoa: (%s) MPOA_res_reply_rcvd: ip %u.%u.%u.%u\n", mpc->dev->name, NIPQUAD(ip)); + + dprintk("mpoa: (%s) MPOA_res_reply_rcvd: ip %u.%u.%u.%u\n", mpc->dev->name, NIPQUAD(dst_ip)); ddprintk("mpoa: (%s) MPOA_res_reply_rcvd() entry = %p", mpc->dev->name, entry); if(entry == NULL){ printk("\nmpoa: (%s) ARGH, received res. reply for an entry that doesn't exist.\n", mpc->dev->name); diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c index 64ddebb64060..781ed1b9329d 100644 --- a/net/atm/mpoa_caches.c +++ b/net/atm/mpoa_caches.c @@ -223,7 +223,6 @@ static void in_cache_remove_entry(in_cache_entry *entry, but an easy one... */ static void clear_count_and_expired(struct mpoa_client *client) { - unsigned char *ip; in_cache_entry *entry, *next_entry; struct timeval now; @@ -236,8 +235,7 @@ static void clear_count_and_expired(struct mpoa_client *client) next_entry = entry->next; if((now.tv_sec - entry->tv.tv_sec) > entry->ctrl_info.holding_time){ - ip = (unsigned char*)&entry->ctrl_info.in_dst_ip; - dprintk("mpoa: mpoa_caches.c: holding time expired, ip = %u.%u.%u.%u\n", NIPQUAD(ip)); + dprintk("mpoa: mpoa_caches.c: holding time expired, ip = %u.%u.%u.%u\n", NIPQUAD(entry->ctrl_info.in_dst_ip)); client->in_ops->remove_entry(entry, client); } entry = next_entry; @@ -455,7 +453,6 @@ static void eg_cache_remove_entry(eg_cache_entry *entry, static eg_cache_entry *eg_cache_add_entry(struct k_message *msg, struct mpoa_client *client) { - unsigned char *ip; eg_cache_entry *entry = kmalloc(sizeof(eg_cache_entry), GFP_KERNEL); if (entry == NULL) { @@ -463,8 +460,7 @@ static eg_cache_entry *eg_cache_add_entry(struct k_message *msg, struct mpoa_cli return NULL; } - ip = (unsigned char *)&msg->content.eg_info.eg_dst_ip; - dprintk("mpoa: mpoa_caches.c: adding an egress entry, ip = %u.%u.%u.%u, this should be our IP\n", NIPQUAD(ip)); + dprintk("mpoa: mpoa_caches.c: adding an egress entry, ip = %u.%u.%u.%u, this should be our IP\n", NIPQUAD(msg->content.eg_info.eg_dst_ip)); memset(entry, 0, sizeof(eg_cache_entry)); atomic_set(&entry->use, 1); @@ -481,8 +477,8 @@ static eg_cache_entry *eg_cache_add_entry(struct k_message *msg, struct mpoa_cli do_gettimeofday(&(entry->tv)); entry->entry_state = EGRESS_RESOLVED; dprintk("mpoa: mpoa_caches.c: new_eg_cache_entry cache_id %lu\n", ntohl(entry->ctrl_info.cache_id)); - ip = (unsigned char *)&entry->ctrl_info.mps_ip; - dprintk("mpoa: mpoa_caches.c: mps_ip = %u.%u.%u.%u\n", NIPQUAD(ip)); + dprintk("mpoa: mpoa_caches.c: mps_ip = %u.%u.%u.%u\n", + NIPQUAD(entry->ctrl_info.mps_ip)); atomic_inc(&entry->use); write_unlock_irq(&client->egress_lock); diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c index 60834b5a14d6..d37b8911b3ab 100644 --- a/net/atm/mpoa_proc.c +++ b/net/atm/mpoa_proc.c @@ -1,4 +1,3 @@ -#include <linux/config.h> #ifdef CONFIG_PROC_FS #include <linux/errno.h> diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c index 1489067c1e84..76a7d8ff6c0e 100644 --- a/net/atm/pppoatm.c +++ b/net/atm/pppoatm.c @@ -34,7 +34,6 @@ */ #include <linux/module.h> -#include <linux/config.h> #include <linux/init.h> #include <linux/skbuff.h> #include <linux/atm.h> diff --git a/net/atm/proc.c b/net/atm/proc.c index 4041054e5282..3f95b0886a6a 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -8,7 +8,6 @@ * the reader. */ -#include <linux/config.h> #include <linux/module.h> /* for EXPORT_SYMBOL */ #include <linux/string.h> #include <linux/types.h> diff --git a/net/atm/pvc.c b/net/atm/pvc.c index f2c541774dcd..b2148b43a426 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -3,7 +3,6 @@ /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ -#include <linux/config.h> #include <linux/net.h> /* struct socket, struct proto_ops */ #include <linux/atm.h> /* ATM stuff */ #include <linux/atmdev.h> /* ATM devices */ diff --git a/net/atm/resources.c b/net/atm/resources.c index 18ac80698f83..de25c6408b04 100644 --- a/net/atm/resources.c +++ b/net/atm/resources.c @@ -8,7 +8,6 @@ * use the default destruct function initialized by sock_init_data */ -#include <linux/config.h> #include <linux/ctype.h> #include <linux/string.h> #include <linux/atmdev.h> @@ -114,14 +113,27 @@ struct atm_dev *atm_dev_register(const char *type, const struct atmdev_ops *ops, printk(KERN_ERR "atm_dev_register: " "atm_proc_dev_register failed for dev %s\n", type); - mutex_unlock(&atm_dev_mutex); - kfree(dev); - return NULL; + goto out_fail; + } + + if (atm_register_sysfs(dev) < 0) { + printk(KERN_ERR "atm_dev_register: " + "atm_register_sysfs failed for dev %s\n", + type); + atm_proc_dev_deregister(dev); + goto out_fail; } + list_add_tail(&dev->dev_list, &atm_devs); - mutex_unlock(&atm_dev_mutex); +out: + mutex_unlock(&atm_dev_mutex); return dev; + +out_fail: + kfree(dev); + dev = NULL; + goto out; } @@ -140,6 +152,7 @@ void atm_dev_deregister(struct atm_dev *dev) mutex_unlock(&atm_dev_mutex); atm_dev_release_vccs(dev); + atm_unregister_sysfs(dev); atm_proc_dev_deregister(dev); atm_dev_put(dev); diff --git a/net/atm/resources.h b/net/atm/resources.h index ac7222fee7a8..1d004aaaeec1 100644 --- a/net/atm/resources.h +++ b/net/atm/resources.h @@ -6,7 +6,6 @@ #ifndef NET_ATM_RESOURCES_H #define NET_ATM_RESOURCES_H -#include <linux/config.h> #include <linux/atmdev.h> #include <linux/mutex.h> @@ -43,4 +42,6 @@ static inline void atm_proc_dev_deregister(struct atm_dev *dev) #endif /* CONFIG_PROC_FS */ +int atm_register_sysfs(struct atm_dev *adev); +void atm_unregister_sysfs(struct atm_dev *adev); #endif diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index a2e0dd047e9f..000695c48583 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -13,7 +13,6 @@ * Copyright (C) Hans Alblas PE1AYX (hans@esrac.ele.tue.nl) * Copyright (C) Frederic Rible F1OAT (frible@teaser.fr) */ -#include <linux/config.h> #include <linux/capability.h> #include <linux/module.h> #include <linux/errno.h> @@ -146,7 +145,7 @@ struct sock *ax25_find_listener(ax25_address *addr, int digi, ax25_cb *s; struct hlist_node *node; - spin_lock_bh(&ax25_list_lock); + spin_lock(&ax25_list_lock); ax25_for_each(s, node, &ax25_list) { if ((s->iamdigi && !digi) || (!s->iamdigi && digi)) continue; @@ -155,12 +154,12 @@ struct sock *ax25_find_listener(ax25_address *addr, int digi, /* If device is null we match any device */ if (s->ax25_dev == NULL || s->ax25_dev->dev == dev) { sock_hold(s->sk); - spin_unlock_bh(&ax25_list_lock); + spin_unlock(&ax25_list_lock); return s->sk; } } } - spin_unlock_bh(&ax25_list_lock); + spin_unlock(&ax25_list_lock); return NULL; } @@ -175,7 +174,7 @@ struct sock *ax25_get_socket(ax25_address *my_addr, ax25_address *dest_addr, ax25_cb *s; struct hlist_node *node; - spin_lock_bh(&ax25_list_lock); + spin_lock(&ax25_list_lock); ax25_for_each(s, node, &ax25_list) { if (s->sk && !ax25cmp(&s->source_addr, my_addr) && !ax25cmp(&s->dest_addr, dest_addr) && @@ -186,7 +185,7 @@ struct sock *ax25_get_socket(ax25_address *my_addr, ax25_address *dest_addr, } } - spin_unlock_bh(&ax25_list_lock); + spin_unlock(&ax25_list_lock); return sk; } @@ -236,7 +235,7 @@ void ax25_send_to_raw(ax25_address *addr, struct sk_buff *skb, int proto) struct sk_buff *copy; struct hlist_node *node; - spin_lock_bh(&ax25_list_lock); + spin_lock(&ax25_list_lock); ax25_for_each(s, node, &ax25_list) { if (s->sk != NULL && ax25cmp(&s->source_addr, addr) == 0 && s->sk->sk_type == SOCK_RAW && @@ -249,7 +248,7 @@ void ax25_send_to_raw(ax25_address *addr, struct sk_buff *skb, int proto) kfree_skb(copy); } } - spin_unlock_bh(&ax25_list_lock); + spin_unlock(&ax25_list_lock); } /* @@ -487,10 +486,9 @@ ax25_cb *ax25_create_cb(void) { ax25_cb *ax25; - if ((ax25 = kmalloc(sizeof(*ax25), GFP_ATOMIC)) == NULL) + if ((ax25 = kzalloc(sizeof(*ax25), GFP_ATOMIC)) == NULL) return NULL; - memset(ax25, 0x00, sizeof(*ax25)); atomic_set(&ax25->refcount, 1); skb_queue_head_init(&ax25->write_queue); diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c index dab77efe34a6..b787678220ff 100644 --- a/net/ax25/ax25_dev.c +++ b/net/ax25/ax25_dev.c @@ -6,7 +6,6 @@ * * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) */ -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> @@ -56,15 +55,13 @@ void ax25_dev_device_up(struct net_device *dev) { ax25_dev *ax25_dev; - if ((ax25_dev = kmalloc(sizeof(*ax25_dev), GFP_ATOMIC)) == NULL) { + if ((ax25_dev = kzalloc(sizeof(*ax25_dev), GFP_ATOMIC)) == NULL) { printk(KERN_ERR "AX.25: ax25_dev_device_up - out of memory\n"); return; } ax25_unregister_sysctl(); - memset(ax25_dev, 0x00, sizeof(*ax25_dev)); - dev->ax25_ptr = ax25_dev; ax25_dev->dev = dev; dev_hold(dev); diff --git a/net/ax25/ax25_ds_subr.c b/net/ax25/ax25_ds_subr.c index 1d4ab641f82b..4d22d4430ec8 100644 --- a/net/ax25/ax25_ds_subr.c +++ b/net/ax25/ax25_ds_subr.c @@ -80,7 +80,7 @@ void ax25_ds_enquiry_response(ax25_cb *ax25) ax25_start_t3timer(ax25); ax25_ds_set_timer(ax25->ax25_dev); - spin_lock_bh(&ax25_list_lock); + spin_lock(&ax25_list_lock); ax25_for_each(ax25o, node, &ax25_list) { if (ax25o == ax25) continue; @@ -106,7 +106,7 @@ void ax25_ds_enquiry_response(ax25_cb *ax25) if (ax25o->state != AX25_STATE_0) ax25_start_t3timer(ax25o); } - spin_unlock_bh(&ax25_list_lock); + spin_unlock(&ax25_list_lock); } void ax25_ds_establish_data_link(ax25_cb *ax25) @@ -162,13 +162,13 @@ static int ax25_check_dama_slave(ax25_dev *ax25_dev) int res = 0; struct hlist_node *node; - spin_lock_bh(&ax25_list_lock); + spin_lock(&ax25_list_lock); ax25_for_each(ax25, node, &ax25_list) if (ax25->ax25_dev == ax25_dev && (ax25->condition & AX25_COND_DAMA_MODE) && ax25->state > AX25_STATE_1) { res = 1; break; } - spin_unlock_bh(&ax25_list_lock); + spin_unlock(&ax25_list_lock); return res; } diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c index 5961459935eb..4f44185955c7 100644 --- a/net/ax25/ax25_ds_timer.c +++ b/net/ax25/ax25_ds_timer.c @@ -85,7 +85,7 @@ static void ax25_ds_timeout(unsigned long arg) return; } - spin_lock_bh(&ax25_list_lock); + spin_lock(&ax25_list_lock); ax25_for_each(ax25, node, &ax25_list) { if (ax25->ax25_dev != ax25_dev || !(ax25->condition & AX25_COND_DAMA_MODE)) continue; @@ -93,7 +93,7 @@ static void ax25_ds_timeout(unsigned long arg) ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND); ax25_disconnect(ax25, ETIMEDOUT); } - spin_unlock_bh(&ax25_list_lock); + spin_unlock(&ax25_list_lock); ax25_dev_dama_off(ax25_dev); } diff --git a/net/ax25/ax25_iface.c b/net/ax25/ax25_iface.c index 3bb152710b77..07ac0207eb69 100644 --- a/net/ax25/ax25_iface.c +++ b/net/ax25/ax25_iface.c @@ -6,7 +6,6 @@ * * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) */ -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> @@ -67,10 +66,10 @@ int ax25_protocol_register(unsigned int pid, protocol->pid = pid; protocol->func = func; - write_lock(&protocol_list_lock); + write_lock_bh(&protocol_list_lock); protocol->next = protocol_list; protocol_list = protocol; - write_unlock(&protocol_list_lock); + write_unlock_bh(&protocol_list_lock); return 1; } @@ -81,16 +80,16 @@ void ax25_protocol_release(unsigned int pid) { struct protocol_struct *s, *protocol; - write_lock(&protocol_list_lock); + write_lock_bh(&protocol_list_lock); protocol = protocol_list; if (protocol == NULL) { - write_unlock(&protocol_list_lock); + write_unlock_bh(&protocol_list_lock); return; } if (protocol->pid == pid) { protocol_list = protocol->next; - write_unlock(&protocol_list_lock); + write_unlock_bh(&protocol_list_lock); kfree(protocol); return; } @@ -99,14 +98,14 @@ void ax25_protocol_release(unsigned int pid) if (protocol->next->pid == pid) { s = protocol->next; protocol->next = protocol->next->next; - write_unlock(&protocol_list_lock); + write_unlock_bh(&protocol_list_lock); kfree(s); return; } protocol = protocol->next; } - write_unlock(&protocol_list_lock); + write_unlock_bh(&protocol_list_lock); } EXPORT_SYMBOL(ax25_protocol_release); @@ -267,13 +266,13 @@ int ax25_protocol_is_registered(unsigned int pid) struct protocol_struct *protocol; int res = 0; - read_lock(&protocol_list_lock); + read_lock_bh(&protocol_list_lock); for (protocol = protocol_list; protocol != NULL; protocol = protocol->next) if (protocol->pid == pid) { res = 1; break; } - read_unlock(&protocol_list_lock); + read_unlock_bh(&protocol_list_lock); return res; } diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c index 4cf87540fb3a..e9d94291581e 100644 --- a/net/ax25/ax25_in.c +++ b/net/ax25/ax25_in.c @@ -102,8 +102,8 @@ static int ax25_rx_fragment(ax25_cb *ax25, struct sk_buff *skb) int ax25_rx_iframe(ax25_cb *ax25, struct sk_buff *skb) { int (*func)(struct sk_buff *, ax25_cb *); - volatile int queued = 0; unsigned char pid; + int queued = 0; if (skb == NULL) return 0; diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c index a0b534f80f17..136c3aefa9de 100644 --- a/net/ax25/ax25_ip.c +++ b/net/ax25/ax25_ip.c @@ -6,7 +6,6 @@ * * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) */ -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> @@ -104,11 +103,13 @@ int ax25_rebuild_header(struct sk_buff *skb) { struct sk_buff *ourskb; unsigned char *bp = skb->data; - struct net_device *dev; + ax25_route *route; + struct net_device *dev = NULL; ax25_address *src, *dst; + ax25_digi *digipeat = NULL; ax25_dev *ax25_dev; - ax25_route _route, *route = &_route; ax25_cb *ax25; + char ip_mode = ' '; dst = (ax25_address *)(bp + 1); src = (ax25_address *)(bp + 8); @@ -116,8 +117,12 @@ int ax25_rebuild_header(struct sk_buff *skb) if (arp_find(bp + 1, skb)) return 1; - route = ax25_rt_find_route(route, dst, NULL); - dev = route->dev; + route = ax25_get_route(dst, NULL); + if (route) { + digipeat = route->digipeat; + dev = route->dev; + ip_mode = route->ip_mode; + }; if (dev == NULL) dev = skb->dev; @@ -127,7 +132,7 @@ int ax25_rebuild_header(struct sk_buff *skb) } if (bp[16] == AX25_P_IP) { - if (route->ip_mode == 'V' || (route->ip_mode == ' ' && ax25_dev->values[AX25_VALUES_IPDEFMODE])) { + if (ip_mode == 'V' || (ip_mode == ' ' && ax25_dev->values[AX25_VALUES_IPDEFMODE])) { /* * We copy the buffer and release the original thereby * keeping it straight @@ -173,7 +178,7 @@ int ax25_rebuild_header(struct sk_buff *skb) ourskb, ax25_dev->values[AX25_VALUES_PACLEN], &src_c, - &dst_c, route->digipeat, dev); + &dst_c, digipeat, dev); if (ax25) { ax25_cb_put(ax25); } @@ -191,7 +196,7 @@ int ax25_rebuild_header(struct sk_buff *skb) skb_pull(skb, AX25_KISS_HEADER_LEN); - if (route->digipeat != NULL) { + if (digipeat != NULL) { if ((ourskb = ax25_rt_build_path(skb, src, dst, route->digipeat)) == NULL) { kfree_skb(skb); goto put; @@ -203,7 +208,8 @@ int ax25_rebuild_header(struct sk_buff *skb) ax25_queue_xmit(skb, dev); put: - ax25_put_route(route); + if (route) + ax25_put_route(route); return 1; } diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c index 5d99852b239c..d7736e585336 100644 --- a/net/ax25/ax25_out.c +++ b/net/ax25/ax25_out.c @@ -8,7 +8,6 @@ * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de) */ -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index 5ac98250797b..51b7bdaf27eb 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -41,8 +41,6 @@ static ax25_route *ax25_route_list; static DEFINE_RWLOCK(ax25_route_lock); -static ax25_route *ax25_get_route(ax25_address *, struct net_device *); - void ax25_rt_device_down(struct net_device *dev) { ax25_route *s, *t, *ax25_rt; @@ -115,7 +113,7 @@ static int ax25_rt_add(struct ax25_routes_struct *route) return -ENOMEM; } - atomic_set(&ax25_rt->ref, 0); + atomic_set(&ax25_rt->refcount, 1); ax25_rt->callsign = route->dest_addr; ax25_rt->dev = ax25_dev->dev; ax25_rt->digipeat = NULL; @@ -140,23 +138,10 @@ static int ax25_rt_add(struct ax25_routes_struct *route) return 0; } -static void ax25_rt_destroy(ax25_route *ax25_rt) +void __ax25_put_route(ax25_route *ax25_rt) { - if (atomic_read(&ax25_rt->ref) == 0) { - kfree(ax25_rt->digipeat); - kfree(ax25_rt); - return; - } - - /* - * Uh... Route is still in use; we can't yet destroy it. Retry later. - */ - init_timer(&ax25_rt->timer); - ax25_rt->timer.data = (unsigned long) ax25_rt; - ax25_rt->timer.function = (void *) ax25_rt_destroy; - ax25_rt->timer.expires = jiffies + 5 * HZ; - - add_timer(&ax25_rt->timer); + kfree(ax25_rt->digipeat); + kfree(ax25_rt); } static int ax25_rt_del(struct ax25_routes_struct *route) @@ -177,12 +162,12 @@ static int ax25_rt_del(struct ax25_routes_struct *route) ax25cmp(&route->dest_addr, &s->callsign) == 0) { if (ax25_route_list == s) { ax25_route_list = s->next; - ax25_rt_destroy(s); + ax25_put_route(s); } else { for (t = ax25_route_list; t != NULL; t = t->next) { if (t->next == s) { t->next = s->next; - ax25_rt_destroy(s); + ax25_put_route(s); break; } } @@ -362,7 +347,7 @@ struct file_operations ax25_route_fops = { * * Only routes with a reference count of zero can be destroyed. */ -static ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev) +ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev) { ax25_route *ax25_spe_rt = NULL; ax25_route *ax25_def_rt = NULL; @@ -392,7 +377,7 @@ static ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev) ax25_rt = ax25_spe_rt; if (ax25_rt != NULL) - atomic_inc(&ax25_rt->ref); + ax25_hold_route(ax25_rt); read_unlock(&ax25_route_lock); @@ -467,24 +452,6 @@ put: return 0; } -ax25_route *ax25_rt_find_route(ax25_route * route, ax25_address *addr, - struct net_device *dev) -{ - ax25_route *ax25_rt; - - if ((ax25_rt = ax25_get_route(addr, dev))) - return ax25_rt; - - route->next = NULL; - atomic_set(&route->ref, 1); - route->callsign = *addr; - route->dev = dev; - route->digipeat = NULL; - route->ip_mode = ' '; - - return route; -} - struct sk_buff *ax25_rt_build_path(struct sk_buff *skb, ax25_address *src, ax25_address *dest, ax25_digi *digi) { diff --git a/net/ax25/ax25_timer.c b/net/ax25/ax25_timer.c index ec254057f212..72594867fab6 100644 --- a/net/ax25/ax25_timer.c +++ b/net/ax25/ax25_timer.c @@ -12,7 +12,6 @@ * Copyright (C) Frederic Rible F1OAT (frible@teaser.fr) * Copyright (C) 2002 Ralf Baechle DO1GRB (ralf@gnu.org) */ -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c index bdb64c36df12..369a75b160f2 100644 --- a/net/ax25/sysctl_net_ax25.c +++ b/net/ax25/sysctl_net_ax25.c @@ -6,7 +6,6 @@ * * Copyright (C) 1996 Mike Shaver (shaver@zeroknowledge.com) */ -#include <linux/config.h> #include <linux/mm.h> #include <linux/sysctl.h> #include <linux/spinlock.h> diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 469eda0f0dfd..788ea7a2b744 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -24,7 +24,6 @@ /* Bluetooth address family and sockets. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> @@ -49,7 +48,7 @@ #define BT_DBG(D...) #endif -#define VERSION "2.8" +#define VERSION "2.10" /* Bluetooth sockets */ #define BT_MAX_PROTO 8 @@ -308,13 +307,21 @@ static struct net_proto_family bt_sock_family_ops = { static int __init bt_init(void) { + int err; + BT_INFO("Core ver %s", VERSION); - sock_register(&bt_sock_family_ops); + err = bt_sysfs_init(); + if (err < 0) + return err; - BT_INFO("HCI device and connection manager initialized"); + err = sock_register(&bt_sock_family_ops); + if (err < 0) { + bt_sysfs_cleanup(); + return err; + } - bt_sysfs_init(); + BT_INFO("HCI device and connection manager initialized"); hci_sock_init(); @@ -325,9 +332,9 @@ static void __exit bt_exit(void) { hci_sock_cleanup(); - bt_sysfs_cleanup(); - sock_unregister(PF_BLUETOOTH); + + bt_sysfs_cleanup(); } subsys_initcall(bt_init); diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index d908d49dc9f8..e620061fb50f 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -29,7 +29,6 @@ * $Id: core.c,v 1.20 2002/08/04 21:23:58 maxk Exp $ */ -#include <linux/config.h> #include <linux/module.h> #include <linux/kernel.h> diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index 921204f95f4a..7f7b27db6a8f 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -29,7 +29,6 @@ * $Id: netdev.c,v 1.8 2002/08/04 21:23:58 maxk Exp $ */ -#include <linux/config.h> #include <linux/module.h> #include <linux/socket.h> diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index 2bfe796cf05d..28c55835422a 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -28,7 +28,6 @@ * $Id: sock.c,v 1.4 2002/08/04 21:23:58 maxk Exp $ */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c index b2e7e38531c6..be04e9fb11f6 100644 --- a/net/bluetooth/cmtp/capi.c +++ b/net/bluetooth/cmtp/capi.c @@ -20,7 +20,6 @@ SOFTWARE IS DISCLAIMED. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> @@ -76,15 +75,13 @@ static struct cmtp_application *cmtp_application_add(struct cmtp_session *session, __u16 appl) { - struct cmtp_application *app = kmalloc(sizeof(*app), GFP_KERNEL); + struct cmtp_application *app = kzalloc(sizeof(*app), GFP_KERNEL); BT_DBG("session %p application %p appl %d", session, app, appl); if (!app) return NULL; - memset(app, 0, sizeof(*app)); - app->state = BT_OPEN; app->appl = appl; diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 901eff7ebe74..b81a01c64aea 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -20,7 +20,6 @@ SOFTWARE IS DISCLAIMED. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> @@ -336,10 +335,9 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock) baswap(&src, &bt_sk(sock->sk)->src); baswap(&dst, &bt_sk(sock->sk)->dst); - session = kmalloc(sizeof(struct cmtp_session), GFP_KERNEL); + session = kzalloc(sizeof(struct cmtp_session), GFP_KERNEL); if (!session) return -ENOMEM; - memset(session, 0, sizeof(struct cmtp_session)); down_write(&cmtp_session_sem); diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c index 8f8fad23f78a..10ad7fd91d83 100644 --- a/net/bluetooth/cmtp/sock.c +++ b/net/bluetooth/cmtp/sock.c @@ -20,7 +20,6 @@ SOFTWARE IS DISCLAIMED. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index f812ed129e58..420ed4d7e57e 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -24,7 +24,6 @@ /* Bluetooth HCI connection handling. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> @@ -116,8 +115,8 @@ void hci_add_sco(struct hci_conn *conn, __u16 handle) static void hci_conn_timeout(unsigned long arg) { - struct hci_conn *conn = (void *)arg; - struct hci_dev *hdev = conn->hdev; + struct hci_conn *conn = (void *) arg; + struct hci_dev *hdev = conn->hdev; BT_DBG("conn %p state %d", conn, conn->state); @@ -133,11 +132,13 @@ static void hci_conn_timeout(unsigned long arg) return; } -static void hci_conn_init_timer(struct hci_conn *conn) +static void hci_conn_idle(unsigned long arg) { - init_timer(&conn->timer); - conn->timer.function = hci_conn_timeout; - conn->timer.data = (unsigned long)conn; + struct hci_conn *conn = (void *) arg; + + BT_DBG("conn %p mode %d", conn, conn->mode); + + hci_conn_enter_sniff_mode(conn); } struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) @@ -146,17 +147,27 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) BT_DBG("%s dst %s", hdev->name, batostr(dst)); - if (!(conn = kmalloc(sizeof(struct hci_conn), GFP_ATOMIC))) + conn = kzalloc(sizeof(struct hci_conn), GFP_ATOMIC); + if (!conn) return NULL; - memset(conn, 0, sizeof(struct hci_conn)); bacpy(&conn->dst, dst); - conn->type = type; conn->hdev = hdev; + conn->type = type; + conn->mode = HCI_CM_ACTIVE; conn->state = BT_OPEN; + conn->power_save = 1; + skb_queue_head_init(&conn->data_q); - hci_conn_init_timer(conn); + + init_timer(&conn->disc_timer); + conn->disc_timer.function = hci_conn_timeout; + conn->disc_timer.data = (unsigned long) conn; + + init_timer(&conn->idle_timer); + conn->idle_timer.function = hci_conn_idle; + conn->idle_timer.data = (unsigned long) conn; atomic_set(&conn->refcnt, 0); @@ -179,7 +190,9 @@ int hci_conn_del(struct hci_conn *conn) BT_DBG("%s conn %p handle %d", hdev->name, conn, conn->handle); - hci_conn_del_timer(conn); + del_timer(&conn->idle_timer); + + del_timer(&conn->disc_timer); if (conn->type == SCO_LINK) { struct hci_conn *acl = conn->link; @@ -365,6 +378,70 @@ int hci_conn_switch_role(struct hci_conn *conn, uint8_t role) } EXPORT_SYMBOL(hci_conn_switch_role); +/* Enter active mode */ +void hci_conn_enter_active_mode(struct hci_conn *conn) +{ + struct hci_dev *hdev = conn->hdev; + + BT_DBG("conn %p mode %d", conn, conn->mode); + + if (test_bit(HCI_RAW, &hdev->flags)) + return; + + if (conn->mode != HCI_CM_SNIFF || !conn->power_save) + goto timer; + + if (!test_and_set_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend)) { + struct hci_cp_exit_sniff_mode cp; + cp.handle = __cpu_to_le16(conn->handle); + hci_send_cmd(hdev, OGF_LINK_POLICY, + OCF_EXIT_SNIFF_MODE, sizeof(cp), &cp); + } + +timer: + if (hdev->idle_timeout > 0) + mod_timer(&conn->idle_timer, + jiffies + msecs_to_jiffies(hdev->idle_timeout)); +} + +/* Enter sniff mode */ +void hci_conn_enter_sniff_mode(struct hci_conn *conn) +{ + struct hci_dev *hdev = conn->hdev; + + BT_DBG("conn %p mode %d", conn, conn->mode); + + if (test_bit(HCI_RAW, &hdev->flags)) + return; + + if (!lmp_sniff_capable(hdev) || !lmp_sniff_capable(conn)) + return; + + if (conn->mode != HCI_CM_ACTIVE || !(conn->link_policy & HCI_LP_SNIFF)) + return; + + if (lmp_sniffsubr_capable(hdev) && lmp_sniffsubr_capable(conn)) { + struct hci_cp_sniff_subrate cp; + cp.handle = __cpu_to_le16(conn->handle); + cp.max_latency = __constant_cpu_to_le16(0); + cp.min_remote_timeout = __constant_cpu_to_le16(0); + cp.min_local_timeout = __constant_cpu_to_le16(0); + hci_send_cmd(hdev, OGF_LINK_POLICY, + OCF_SNIFF_SUBRATE, sizeof(cp), &cp); + } + + if (!test_and_set_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend)) { + struct hci_cp_sniff_mode cp; + cp.handle = __cpu_to_le16(conn->handle); + cp.max_interval = __cpu_to_le16(hdev->sniff_max_interval); + cp.min_interval = __cpu_to_le16(hdev->sniff_min_interval); + cp.attempt = __constant_cpu_to_le16(4); + cp.timeout = __constant_cpu_to_le16(1); + hci_send_cmd(hdev, OGF_LINK_POLICY, + OCF_SNIFF_MODE, sizeof(cp), &cp); + } +} + /* Drop all connection on the device */ void hci_conn_hash_flush(struct hci_dev *hdev) { diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index a49a6975092d..5ed474277903 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -24,7 +24,6 @@ /* Bluetooth HCI core. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/kmod.h> @@ -337,9 +336,8 @@ void hci_inquiry_cache_update(struct hci_dev *hdev, struct inquiry_data *data) if (!(e = hci_inquiry_cache_lookup(hdev, &data->bdaddr))) { /* Entry not in the cache. Add new one. */ - if (!(e = kmalloc(sizeof(struct inquiry_entry), GFP_ATOMIC))) + if (!(e = kzalloc(sizeof(struct inquiry_entry), GFP_ATOMIC))) return; - memset(e, 0, sizeof(struct inquiry_entry)); e->next = cache->list; cache->list = e; } @@ -412,7 +410,7 @@ int hci_inquiry(void __user *arg) } hci_dev_unlock_bh(hdev); - timeo = ir.length * 2 * HZ; + timeo = ir.length * msecs_to_jiffies(2000); if (do_inquiry && (err = hci_request(hdev, hci_inq_req, (unsigned long)&ir, timeo)) < 0) goto done; @@ -480,7 +478,8 @@ int hci_dev_open(__u16 dev) set_bit(HCI_INIT, &hdev->flags); //__hci_request(hdev, hci_reset_req, 0, HZ); - ret = __hci_request(hdev, hci_init_req, 0, HCI_INIT_TIMEOUT); + ret = __hci_request(hdev, hci_init_req, 0, + msecs_to_jiffies(HCI_INIT_TIMEOUT)); clear_bit(HCI_INIT, &hdev->flags); } @@ -547,7 +546,8 @@ static int hci_dev_do_close(struct hci_dev *hdev) atomic_set(&hdev->cmd_cnt, 1); if (!test_bit(HCI_RAW, &hdev->flags)) { set_bit(HCI_INIT, &hdev->flags); - __hci_request(hdev, hci_reset_req, 0, HZ/4); + __hci_request(hdev, hci_reset_req, 0, + msecs_to_jiffies(250)); clear_bit(HCI_INIT, &hdev->flags); } @@ -620,7 +620,8 @@ int hci_dev_reset(__u16 dev) hdev->acl_cnt = 0; hdev->sco_cnt = 0; if (!test_bit(HCI_RAW, &hdev->flags)) - ret = __hci_request(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT); + ret = __hci_request(hdev, hci_reset_req, 0, + msecs_to_jiffies(HCI_INIT_TIMEOUT)); done: tasklet_enable(&hdev->tx_task); @@ -658,7 +659,8 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) switch (cmd) { case HCISETAUTH: - err = hci_request(hdev, hci_auth_req, dr.dev_opt, HCI_INIT_TIMEOUT); + err = hci_request(hdev, hci_auth_req, dr.dev_opt, + msecs_to_jiffies(HCI_INIT_TIMEOUT)); break; case HCISETENCRYPT: @@ -669,18 +671,19 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) if (!test_bit(HCI_AUTH, &hdev->flags)) { /* Auth must be enabled first */ - err = hci_request(hdev, hci_auth_req, - dr.dev_opt, HCI_INIT_TIMEOUT); + err = hci_request(hdev, hci_auth_req, dr.dev_opt, + msecs_to_jiffies(HCI_INIT_TIMEOUT)); if (err) break; } - err = hci_request(hdev, hci_encrypt_req, - dr.dev_opt, HCI_INIT_TIMEOUT); + err = hci_request(hdev, hci_encrypt_req, dr.dev_opt, + msecs_to_jiffies(HCI_INIT_TIMEOUT)); break; case HCISETSCAN: - err = hci_request(hdev, hci_scan_req, dr.dev_opt, HCI_INIT_TIMEOUT); + err = hci_request(hdev, hci_scan_req, dr.dev_opt, + msecs_to_jiffies(HCI_INIT_TIMEOUT)); break; case HCISETPTYPE: @@ -796,12 +799,10 @@ struct hci_dev *hci_alloc_dev(void) { struct hci_dev *hdev; - hdev = kmalloc(sizeof(struct hci_dev), GFP_KERNEL); + hdev = kzalloc(sizeof(struct hci_dev), GFP_KERNEL); if (!hdev) return NULL; - memset(hdev, 0, sizeof(struct hci_dev)); - skb_queue_head_init(&hdev->driver_init); return hdev; @@ -813,8 +814,8 @@ void hci_free_dev(struct hci_dev *hdev) { skb_queue_purge(&hdev->driver_init); - /* will free via class release */ - class_device_put(&hdev->class_dev); + /* will free via device release */ + put_device(&hdev->dev); } EXPORT_SYMBOL(hci_free_dev); @@ -849,6 +850,10 @@ int hci_register_dev(struct hci_dev *hdev) hdev->pkt_type = (HCI_DM1 | HCI_DH1 | HCI_HV1); hdev->link_mode = (HCI_LM_ACCEPT); + hdev->idle_timeout = 0; + hdev->sniff_max_interval = 800; + hdev->sniff_min_interval = 80; + tasklet_init(&hdev->cmd_task, hci_cmd_task,(unsigned long) hdev); tasklet_init(&hdev->rx_task, hci_rx_task, (unsigned long) hdev); tasklet_init(&hdev->tx_task, hci_tx_task, (unsigned long) hdev); @@ -1221,6 +1226,9 @@ static inline void hci_sched_acl(struct hci_dev *hdev) while (hdev->acl_cnt && (conn = hci_low_sent(hdev, ACL_LINK, "e))) { while (quote-- && (skb = skb_dequeue(&conn->data_q))) { BT_DBG("skb %p len %d", skb, skb->len); + + hci_conn_enter_active_mode(conn); + hci_send_frame(skb); hdev->acl_last_tx = jiffies; @@ -1299,6 +1307,8 @@ static inline void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb) if (conn) { register struct hci_proto *hp; + hci_conn_enter_active_mode(conn); + /* Send to upper protocol */ if ((hp = hci_proto[HCI_PROTO_L2CAP]) && hp->recv_acldata) { hp->recv_acldata(conn, skb, flags); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index eb64555d1fb3..3896dabab11d 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -24,7 +24,6 @@ /* Bluetooth HCI event handling. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> @@ -84,6 +83,8 @@ static void hci_cc_link_policy(struct hci_dev *hdev, __u16 ocf, struct sk_buff * { struct hci_conn *conn; struct hci_rp_role_discovery *rd; + struct hci_rp_write_link_policy *lp; + void *sent; BT_DBG("%s ocf 0x%x", hdev->name, ocf); @@ -107,6 +108,27 @@ static void hci_cc_link_policy(struct hci_dev *hdev, __u16 ocf, struct sk_buff * hci_dev_unlock(hdev); break; + case OCF_WRITE_LINK_POLICY: + sent = hci_sent_cmd_data(hdev, OGF_LINK_POLICY, OCF_WRITE_LINK_POLICY); + if (!sent) + break; + + lp = (struct hci_rp_write_link_policy *) skb->data; + + if (lp->status) + break; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(lp->handle)); + if (conn) { + __le16 policy = get_unaligned((__le16 *) (sent + 2)); + conn->link_policy = __le16_to_cpu(policy); + } + + hci_dev_unlock(hdev); + break; + default: BT_DBG("%s: Command complete: ogf LINK_POLICY ocf %x", hdev->name, ocf); @@ -275,7 +297,7 @@ static void hci_cc_host_ctl(struct hci_dev *hdev, __u16 ocf, struct sk_buff *skb /* Command Complete OGF INFO_PARAM */ static void hci_cc_info_param(struct hci_dev *hdev, __u16 ocf, struct sk_buff *skb) { - struct hci_rp_read_loc_features *lf; + struct hci_rp_read_local_features *lf; struct hci_rp_read_buffer_size *bs; struct hci_rp_read_bd_addr *ba; @@ -283,7 +305,7 @@ static void hci_cc_info_param(struct hci_dev *hdev, __u16 ocf, struct sk_buff *s switch (ocf) { case OCF_READ_LOCAL_FEATURES: - lf = (struct hci_rp_read_loc_features *) skb->data; + lf = (struct hci_rp_read_local_features *) skb->data; if (lf->status) { BT_DBG("%s READ_LOCAL_FEATURES failed %d", hdev->name, lf->status); @@ -320,9 +342,17 @@ static void hci_cc_info_param(struct hci_dev *hdev, __u16 ocf, struct sk_buff *s } hdev->acl_mtu = __le16_to_cpu(bs->acl_mtu); - hdev->sco_mtu = bs->sco_mtu ? bs->sco_mtu : 64; - hdev->acl_pkts = hdev->acl_cnt = __le16_to_cpu(bs->acl_max_pkt); - hdev->sco_pkts = hdev->sco_cnt = __le16_to_cpu(bs->sco_max_pkt); + hdev->sco_mtu = bs->sco_mtu; + hdev->acl_pkts = __le16_to_cpu(bs->acl_max_pkt); + hdev->sco_pkts = __le16_to_cpu(bs->sco_max_pkt); + + if (test_bit(HCI_QUIRK_FIXUP_BUFFER_SIZE, &hdev->quirks)) { + hdev->sco_mtu = 64; + hdev->sco_pkts = 8; + } + + hdev->acl_cnt = hdev->acl_pkts; + hdev->sco_cnt = hdev->sco_pkts; BT_DBG("%s mtu: acl %d, sco %d max_pkt: acl %d, sco %d", hdev->name, hdev->acl_mtu, hdev->sco_mtu, hdev->acl_pkts, hdev->sco_pkts); @@ -440,8 +470,46 @@ static void hci_cs_link_policy(struct hci_dev *hdev, __u16 ocf, __u8 status) BT_DBG("%s ocf 0x%x", hdev->name, ocf); switch (ocf) { + case OCF_SNIFF_MODE: + if (status) { + struct hci_conn *conn; + struct hci_cp_sniff_mode *cp = hci_sent_cmd_data(hdev, OGF_LINK_POLICY, OCF_SNIFF_MODE); + + if (!cp) + break; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); + if (conn) { + clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend); + } + + hci_dev_unlock(hdev); + } + break; + + case OCF_EXIT_SNIFF_MODE: + if (status) { + struct hci_conn *conn; + struct hci_cp_exit_sniff_mode *cp = hci_sent_cmd_data(hdev, OGF_LINK_POLICY, OCF_EXIT_SNIFF_MODE); + + if (!cp) + break; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); + if (conn) { + clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend); + } + + hci_dev_unlock(hdev); + } + break; + default: - BT_DBG("%s Command status: ogf HOST_POLICY ocf %x", hdev->name, ocf); + BT_DBG("%s Command status: ogf LINK_POLICY ocf %x", hdev->name, ocf); break; } } @@ -623,14 +691,16 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk else cp.role = 0x01; /* Remain slave */ - hci_send_cmd(hdev, OGF_LINK_CTL, OCF_ACCEPT_CONN_REQ, sizeof(cp), &cp); + hci_send_cmd(hdev, OGF_LINK_CTL, + OCF_ACCEPT_CONN_REQ, sizeof(cp), &cp); } else { /* Connection rejected */ struct hci_cp_reject_conn_req cp; bacpy(&cp.bdaddr, &ev->bdaddr); cp.reason = 0x0f; - hci_send_cmd(hdev, OGF_LINK_CTL, OCF_REJECT_CONN_REQ, sizeof(cp), &cp); + hci_send_cmd(hdev, OGF_LINK_CTL, + OCF_REJECT_CONN_REQ, sizeof(cp), &cp); } } @@ -638,7 +708,7 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_conn_complete *ev = (struct hci_ev_conn_complete *) skb->data; - struct hci_conn *conn = NULL; + struct hci_conn *conn; BT_DBG("%s", hdev->name); @@ -660,12 +730,21 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s if (test_bit(HCI_ENCRYPT, &hdev->flags)) conn->link_mode |= HCI_LM_ENCRYPT; + /* Get remote features */ + if (conn->type == ACL_LINK) { + struct hci_cp_read_remote_features cp; + cp.handle = ev->handle; + hci_send_cmd(hdev, OGF_LINK_CTL, + OCF_READ_REMOTE_FEATURES, sizeof(cp), &cp); + } + /* Set link policy */ if (conn->type == ACL_LINK && hdev->link_policy) { struct hci_cp_write_link_policy cp; cp.handle = ev->handle; cp.policy = __cpu_to_le16(hdev->link_policy); - hci_send_cmd(hdev, OGF_LINK_POLICY, OCF_WRITE_LINK_POLICY, sizeof(cp), &cp); + hci_send_cmd(hdev, OGF_LINK_POLICY, + OCF_WRITE_LINK_POLICY, sizeof(cp), &cp); } /* Set packet type for incoming connection */ @@ -676,7 +755,8 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s __cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK): __cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK); - hci_send_cmd(hdev, OGF_LINK_CTL, OCF_CHANGE_CONN_PTYPE, sizeof(cp), &cp); + hci_send_cmd(hdev, OGF_LINK_CTL, + OCF_CHANGE_CONN_PTYPE, sizeof(cp), &cp); } } else conn->state = BT_CLOSED; @@ -704,8 +784,7 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s static inline void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_disconn_complete *ev = (struct hci_ev_disconn_complete *) skb->data; - struct hci_conn *conn = NULL; - __u16 handle = __le16_to_cpu(ev->handle); + struct hci_conn *conn; BT_DBG("%s status %d", hdev->name, ev->status); @@ -714,7 +793,7 @@ static inline void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff hci_dev_lock(hdev); - conn = hci_conn_hash_lookup_handle(hdev, handle); + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); if (conn) { conn->state = BT_CLOSED; hci_proto_disconn_ind(conn, ev->reason); @@ -771,7 +850,7 @@ static inline void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *s static inline void hci_role_change_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_role_change *ev = (struct hci_ev_role_change *) skb->data; - struct hci_conn *conn = NULL; + struct hci_conn *conn; BT_DBG("%s status %d", hdev->name, ev->status); @@ -794,18 +873,43 @@ static inline void hci_role_change_evt(struct hci_dev *hdev, struct sk_buff *skb hci_dev_unlock(hdev); } +/* Mode Change */ +static inline void hci_mode_change_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_ev_mode_change *ev = (struct hci_ev_mode_change *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s status %d", hdev->name, ev->status); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); + if (conn) { + conn->mode = ev->mode; + conn->interval = __le16_to_cpu(ev->interval); + + if (!test_and_clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend)) { + if (conn->mode == HCI_CM_ACTIVE) + conn->power_save = 1; + else + conn->power_save = 0; + } + } + + hci_dev_unlock(hdev); +} + /* Authentication Complete */ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_auth_complete *ev = (struct hci_ev_auth_complete *) skb->data; - struct hci_conn *conn = NULL; - __u16 handle = __le16_to_cpu(ev->handle); + struct hci_conn *conn; BT_DBG("%s status %d", hdev->name, ev->status); hci_dev_lock(hdev); - conn = hci_conn_hash_lookup_handle(hdev, handle); + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); if (conn) { if (!ev->status) conn->link_mode |= HCI_LM_AUTH; @@ -820,8 +924,7 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s cp.handle = __cpu_to_le16(conn->handle); cp.encrypt = 1; hci_send_cmd(conn->hdev, OGF_LINK_CTL, - OCF_SET_CONN_ENCRYPT, - sizeof(cp), &cp); + OCF_SET_CONN_ENCRYPT, sizeof(cp), &cp); } else { clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend); hci_encrypt_cfm(conn, ev->status, 0x00); @@ -836,14 +939,13 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s static inline void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_encrypt_change *ev = (struct hci_ev_encrypt_change *) skb->data; - struct hci_conn *conn = NULL; - __u16 handle = __le16_to_cpu(ev->handle); + struct hci_conn *conn; BT_DBG("%s status %d", hdev->name, ev->status); hci_dev_lock(hdev); - conn = hci_conn_hash_lookup_handle(hdev, handle); + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); if (conn) { if (!ev->status) { if (ev->encrypt) @@ -864,14 +966,13 @@ static inline void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff * static inline void hci_change_conn_link_key_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_change_conn_link_key_complete *ev = (struct hci_ev_change_conn_link_key_complete *) skb->data; - struct hci_conn *conn = NULL; - __u16 handle = __le16_to_cpu(ev->handle); + struct hci_conn *conn; BT_DBG("%s status %d", hdev->name, ev->status); hci_dev_lock(hdev); - conn = hci_conn_hash_lookup_handle(hdev, handle); + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); if (conn) { if (!ev->status) conn->link_mode |= HCI_LM_SECURE; @@ -899,18 +1000,35 @@ static inline void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff { } +/* Remote Features */ +static inline void hci_remote_features_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_ev_remote_features *ev = (struct hci_ev_remote_features *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s status %d", hdev->name, ev->status); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); + if (conn && !ev->status) { + memcpy(conn->features, ev->features, sizeof(conn->features)); + } + + hci_dev_unlock(hdev); +} + /* Clock Offset */ static inline void hci_clock_offset_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_clock_offset *ev = (struct hci_ev_clock_offset *) skb->data; - struct hci_conn *conn = NULL; - __u16 handle = __le16_to_cpu(ev->handle); + struct hci_conn *conn; BT_DBG("%s status %d", hdev->name, ev->status); hci_dev_lock(hdev); - conn = hci_conn_hash_lookup_handle(hdev, handle); + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); if (conn && !ev->status) { struct inquiry_entry *ie; @@ -941,6 +1059,23 @@ static inline void hci_pscan_rep_mode_evt(struct hci_dev *hdev, struct sk_buff * hci_dev_unlock(hdev); } +/* Sniff Subrate */ +static inline void hci_sniff_subrate_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_ev_sniff_subrate *ev = (struct hci_ev_sniff_subrate *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s status %d", hdev->name, ev->status); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); + if (conn) { + } + + hci_dev_unlock(hdev); +} + void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_event_hdr *hdr = (struct hci_event_hdr *) skb->data; @@ -989,6 +1124,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_role_change_evt(hdev, skb); break; + case HCI_EV_MODE_CHANGE: + hci_mode_change_evt(hdev, skb); + break; + case HCI_EV_AUTH_COMPLETE: hci_auth_complete_evt(hdev, skb); break; @@ -1013,6 +1152,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_link_key_notify_evt(hdev, skb); break; + case HCI_EV_REMOTE_FEATURES: + hci_remote_features_evt(hdev, skb); + break; + case HCI_EV_CLOCK_OFFSET: hci_clock_offset_evt(hdev, skb); break; @@ -1021,6 +1164,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_pscan_rep_mode_evt(hdev, skb); break; + case HCI_EV_SNIFF_SUBRATE: + hci_sniff_subrate_evt(hdev, skb); + break; + case HCI_EV_CMD_STATUS: cs = (struct hci_ev_cmd_status *) skb->data; skb_pull(skb, sizeof(cs)); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 97bdec73d17e..1a35d343e08a 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -24,7 +24,6 @@ /* Bluetooth HCI sockets. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 0ed38740388c..3987d167f04e 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -1,9 +1,10 @@ /* Bluetooth HCI driver model support. */ -#include <linux/config.h> #include <linux/kernel.h> #include <linux/init.h> +#include <linux/platform_device.h> + #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> @@ -12,35 +13,35 @@ #define BT_DBG(D...) #endif -static ssize_t show_name(struct class_device *cdev, char *buf) +static ssize_t show_name(struct device *dev, struct device_attribute *attr, char *buf) { - struct hci_dev *hdev = class_get_devdata(cdev); + struct hci_dev *hdev = dev_get_drvdata(dev); return sprintf(buf, "%s\n", hdev->name); } -static ssize_t show_type(struct class_device *cdev, char *buf) +static ssize_t show_type(struct device *dev, struct device_attribute *attr, char *buf) { - struct hci_dev *hdev = class_get_devdata(cdev); + struct hci_dev *hdev = dev_get_drvdata(dev); return sprintf(buf, "%d\n", hdev->type); } -static ssize_t show_address(struct class_device *cdev, char *buf) +static ssize_t show_address(struct device *dev, struct device_attribute *attr, char *buf) { - struct hci_dev *hdev = class_get_devdata(cdev); + struct hci_dev *hdev = dev_get_drvdata(dev); bdaddr_t bdaddr; baswap(&bdaddr, &hdev->bdaddr); return sprintf(buf, "%s\n", batostr(&bdaddr)); } -static ssize_t show_flags(struct class_device *cdev, char *buf) +static ssize_t show_flags(struct device *dev, struct device_attribute *attr, char *buf) { - struct hci_dev *hdev = class_get_devdata(cdev); + struct hci_dev *hdev = dev_get_drvdata(dev); return sprintf(buf, "0x%lx\n", hdev->flags); } -static ssize_t show_inquiry_cache(struct class_device *cdev, char *buf) +static ssize_t show_inquiry_cache(struct device *dev, struct device_attribute *attr, char *buf) { - struct hci_dev *hdev = class_get_devdata(cdev); + struct hci_dev *hdev = dev_get_drvdata(dev); struct inquiry_cache *cache = &hdev->inq_cache; struct inquiry_entry *e; int n = 0; @@ -62,94 +63,193 @@ static ssize_t show_inquiry_cache(struct class_device *cdev, char *buf) return n; } -static CLASS_DEVICE_ATTR(name, S_IRUGO, show_name, NULL); -static CLASS_DEVICE_ATTR(type, S_IRUGO, show_type, NULL); -static CLASS_DEVICE_ATTR(address, S_IRUGO, show_address, NULL); -static CLASS_DEVICE_ATTR(flags, S_IRUGO, show_flags, NULL); -static CLASS_DEVICE_ATTR(inquiry_cache, S_IRUGO, show_inquiry_cache, NULL); +static ssize_t show_idle_timeout(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hci_dev *hdev = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", hdev->idle_timeout); +} -static struct class_device_attribute *bt_attrs[] = { - &class_device_attr_name, - &class_device_attr_type, - &class_device_attr_address, - &class_device_attr_flags, - &class_device_attr_inquiry_cache, - NULL -}; +static ssize_t store_idle_timeout(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + struct hci_dev *hdev = dev_get_drvdata(dev); + char *ptr; + __u32 val; + + val = simple_strtoul(buf, &ptr, 10); + if (ptr == buf) + return -EINVAL; -#ifdef CONFIG_HOTPLUG -static int bt_uevent(struct class_device *cdev, char **envp, int num_envp, char *buf, int size) + if (val != 0 && (val < 500 || val > 3600000)) + return -EINVAL; + + hdev->idle_timeout = val; + + return count; +} + +static ssize_t show_sniff_max_interval(struct device *dev, struct device_attribute *attr, char *buf) { - struct hci_dev *hdev = class_get_devdata(cdev); - int n, i = 0; + struct hci_dev *hdev = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", hdev->sniff_max_interval); +} - envp[i++] = buf; - n = snprintf(buf, size, "INTERFACE=%s", hdev->name) + 1; - buf += n; - size -= n; +static ssize_t store_sniff_max_interval(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + struct hci_dev *hdev = dev_get_drvdata(dev); + char *ptr; + __u16 val; - if ((size <= 0) || (i >= num_envp)) - return -ENOMEM; + val = simple_strtoul(buf, &ptr, 10); + if (ptr == buf) + return -EINVAL; - envp[i] = NULL; - return 0; + if (val < 0x0002 || val > 0xFFFE || val % 2) + return -EINVAL; + + if (val < hdev->sniff_min_interval) + return -EINVAL; + + hdev->sniff_max_interval = val; + + return count; +} + +static ssize_t show_sniff_min_interval(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hci_dev *hdev = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", hdev->sniff_min_interval); } -#endif -static void bt_release(struct class_device *cdev) +static ssize_t store_sniff_min_interval(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct hci_dev *hdev = class_get_devdata(cdev); + struct hci_dev *hdev = dev_get_drvdata(dev); + char *ptr; + __u16 val; - kfree(hdev); + val = simple_strtoul(buf, &ptr, 10); + if (ptr == buf) + return -EINVAL; + + if (val < 0x0002 || val > 0xFFFE || val % 2) + return -EINVAL; + + if (val > hdev->sniff_max_interval) + return -EINVAL; + + hdev->sniff_min_interval = val; + + return count; } -struct class bt_class = { - .name = "bluetooth", - .release = bt_release, -#ifdef CONFIG_HOTPLUG - .uevent = bt_uevent, -#endif +static DEVICE_ATTR(name, S_IRUGO, show_name, NULL); +static DEVICE_ATTR(type, S_IRUGO, show_type, NULL); +static DEVICE_ATTR(address, S_IRUGO, show_address, NULL); +static DEVICE_ATTR(flags, S_IRUGO, show_flags, NULL); +static DEVICE_ATTR(inquiry_cache, S_IRUGO, show_inquiry_cache, NULL); + +static DEVICE_ATTR(idle_timeout, S_IRUGO | S_IWUSR, + show_idle_timeout, store_idle_timeout); +static DEVICE_ATTR(sniff_max_interval, S_IRUGO | S_IWUSR, + show_sniff_max_interval, store_sniff_max_interval); +static DEVICE_ATTR(sniff_min_interval, S_IRUGO | S_IWUSR, + show_sniff_min_interval, store_sniff_min_interval); + +static struct device_attribute *bt_attrs[] = { + &dev_attr_name, + &dev_attr_type, + &dev_attr_address, + &dev_attr_flags, + &dev_attr_inquiry_cache, + &dev_attr_idle_timeout, + &dev_attr_sniff_max_interval, + &dev_attr_sniff_min_interval, + NULL }; +struct class *bt_class = NULL; EXPORT_SYMBOL_GPL(bt_class); +static struct bus_type bt_bus = { + .name = "bluetooth", +}; + +static struct platform_device *bt_platform; + +static void bt_release(struct device *dev) +{ + struct hci_dev *hdev = dev_get_drvdata(dev); + kfree(hdev); +} + int hci_register_sysfs(struct hci_dev *hdev) { - struct class_device *cdev = &hdev->class_dev; + struct device *dev = &hdev->dev; unsigned int i; int err; BT_DBG("%p name %s type %d", hdev, hdev->name, hdev->type); - cdev->class = &bt_class; - class_set_devdata(cdev, hdev); + dev->class = bt_class; + + if (hdev->parent) + dev->parent = hdev->parent; + else + dev->parent = &bt_platform->dev; + + strlcpy(dev->bus_id, hdev->name, BUS_ID_SIZE); + + dev->release = bt_release; - strlcpy(cdev->class_id, hdev->name, BUS_ID_SIZE); - err = class_device_register(cdev); + dev_set_drvdata(dev, hdev); + + err = device_register(dev); if (err < 0) return err; for (i = 0; bt_attrs[i]; i++) - class_device_create_file(cdev, bt_attrs[i]); + device_create_file(dev, bt_attrs[i]); return 0; } void hci_unregister_sysfs(struct hci_dev *hdev) { - struct class_device * cdev = &hdev->class_dev; + struct device *dev = &hdev->dev; BT_DBG("%p name %s type %d", hdev, hdev->name, hdev->type); - class_device_del(cdev); + device_del(dev); } int __init bt_sysfs_init(void) { - return class_register(&bt_class); + int err; + + bt_platform = platform_device_register_simple("bluetooth", -1, NULL, 0); + if (IS_ERR(bt_platform)) + return PTR_ERR(bt_platform); + + err = bus_register(&bt_bus); + if (err < 0) { + platform_device_unregister(bt_platform); + return err; + } + + bt_class = class_create(THIS_MODULE, "bluetooth"); + if (IS_ERR(bt_class)) { + bus_unregister(&bt_bus); + platform_device_unregister(bt_platform); + return PTR_ERR(bt_class); + } + + return 0; } void __exit bt_sysfs_cleanup(void) { - class_unregister(&bt_class); + class_destroy(bt_class); + + bus_unregister(&bt_bus); + + platform_device_unregister(bt_platform); } diff --git a/net/bluetooth/hidp/Kconfig b/net/bluetooth/hidp/Kconfig index edfea772fb67..c6abf2a5a932 100644 --- a/net/bluetooth/hidp/Kconfig +++ b/net/bluetooth/hidp/Kconfig @@ -1,7 +1,6 @@ config BT_HIDP tristate "HIDP protocol support" - depends on BT && BT_L2CAP && (BROKEN || !S390) - select INPUT + depends on BT && BT_L2CAP && INPUT help HIDP (Human Interface Device Protocol) is a transport layer for HID reports. HIDP is required for the Bluetooth Human diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index cdb9cfafd960..c6e3a2c27c6e 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -20,7 +20,6 @@ SOFTWARE IS DISCLAIMED. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> @@ -583,10 +582,9 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, bacmp(&bt_sk(ctrl_sock->sk)->dst, &bt_sk(intr_sock->sk)->dst)) return -ENOTUNIQ; - session = kmalloc(sizeof(struct hidp_session), GFP_KERNEL); + session = kzalloc(sizeof(struct hidp_session), GFP_KERNEL); if (!session) return -ENOMEM; - memset(session, 0, sizeof(struct hidp_session)); session->input = input_allocate_device(); if (!session->input) { diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index b8f67761b886..099646e4e2ef 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c @@ -20,7 +20,6 @@ SOFTWARE IS DISCLAIMED. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index f6b4a8085357..d56f60b392ac 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -24,7 +24,6 @@ /* Bluetooth L2CAP core and sockets. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> @@ -64,11 +63,6 @@ static struct bt_sock_list l2cap_sk_list = { .lock = RW_LOCK_UNLOCKED }; -static int l2cap_conn_del(struct hci_conn *conn, int err); - -static void __l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct sock *parent); -static void l2cap_chan_del(struct sock *sk, int err); - static void __l2cap_sock_close(struct sock *sk, int reason); static void l2cap_sock_close(struct sock *sk); static void l2cap_sock_kill(struct sock *sk); @@ -110,24 +104,177 @@ static void l2cap_sock_init_timer(struct sock *sk) sk->sk_timer.data = (unsigned long)sk; } +/* ---- L2CAP channels ---- */ +static struct sock *__l2cap_get_chan_by_dcid(struct l2cap_chan_list *l, u16 cid) +{ + struct sock *s; + for (s = l->head; s; s = l2cap_pi(s)->next_c) { + if (l2cap_pi(s)->dcid == cid) + break; + } + return s; +} + +static struct sock *__l2cap_get_chan_by_scid(struct l2cap_chan_list *l, u16 cid) +{ + struct sock *s; + for (s = l->head; s; s = l2cap_pi(s)->next_c) { + if (l2cap_pi(s)->scid == cid) + break; + } + return s; +} + +/* Find channel with given SCID. + * Returns locked socket */ +static inline struct sock *l2cap_get_chan_by_scid(struct l2cap_chan_list *l, u16 cid) +{ + struct sock *s; + read_lock(&l->lock); + s = __l2cap_get_chan_by_scid(l, cid); + if (s) bh_lock_sock(s); + read_unlock(&l->lock); + return s; +} + +static struct sock *__l2cap_get_chan_by_ident(struct l2cap_chan_list *l, u8 ident) +{ + struct sock *s; + for (s = l->head; s; s = l2cap_pi(s)->next_c) { + if (l2cap_pi(s)->ident == ident) + break; + } + return s; +} + +static inline struct sock *l2cap_get_chan_by_ident(struct l2cap_chan_list *l, u8 ident) +{ + struct sock *s; + read_lock(&l->lock); + s = __l2cap_get_chan_by_ident(l, ident); + if (s) bh_lock_sock(s); + read_unlock(&l->lock); + return s; +} + +static u16 l2cap_alloc_cid(struct l2cap_chan_list *l) +{ + u16 cid = 0x0040; + + for (; cid < 0xffff; cid++) { + if(!__l2cap_get_chan_by_scid(l, cid)) + return cid; + } + + return 0; +} + +static inline void __l2cap_chan_link(struct l2cap_chan_list *l, struct sock *sk) +{ + sock_hold(sk); + + if (l->head) + l2cap_pi(l->head)->prev_c = sk; + + l2cap_pi(sk)->next_c = l->head; + l2cap_pi(sk)->prev_c = NULL; + l->head = sk; +} + +static inline void l2cap_chan_unlink(struct l2cap_chan_list *l, struct sock *sk) +{ + struct sock *next = l2cap_pi(sk)->next_c, *prev = l2cap_pi(sk)->prev_c; + + write_lock_bh(&l->lock); + if (sk == l->head) + l->head = next; + + if (next) + l2cap_pi(next)->prev_c = prev; + if (prev) + l2cap_pi(prev)->next_c = next; + write_unlock_bh(&l->lock); + + __sock_put(sk); +} + +static void __l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct sock *parent) +{ + struct l2cap_chan_list *l = &conn->chan_list; + + BT_DBG("conn %p, psm 0x%2.2x, dcid 0x%4.4x", conn, l2cap_pi(sk)->psm, l2cap_pi(sk)->dcid); + + l2cap_pi(sk)->conn = conn; + + if (sk->sk_type == SOCK_SEQPACKET) { + /* Alloc CID for connection-oriented socket */ + l2cap_pi(sk)->scid = l2cap_alloc_cid(l); + } else if (sk->sk_type == SOCK_DGRAM) { + /* Connectionless socket */ + l2cap_pi(sk)->scid = 0x0002; + l2cap_pi(sk)->dcid = 0x0002; + l2cap_pi(sk)->omtu = L2CAP_DEFAULT_MTU; + } else { + /* Raw socket can send/recv signalling messages only */ + l2cap_pi(sk)->scid = 0x0001; + l2cap_pi(sk)->dcid = 0x0001; + l2cap_pi(sk)->omtu = L2CAP_DEFAULT_MTU; + } + + __l2cap_chan_link(l, sk); + + if (parent) + bt_accept_enqueue(parent, sk); +} + +/* Delete channel. + * Must be called on the locked socket. */ +static void l2cap_chan_del(struct sock *sk, int err) +{ + struct l2cap_conn *conn = l2cap_pi(sk)->conn; + struct sock *parent = bt_sk(sk)->parent; + + l2cap_sock_clear_timer(sk); + + BT_DBG("sk %p, conn %p, err %d", sk, conn, err); + + if (conn) { + /* Unlink from channel list */ + l2cap_chan_unlink(&conn->chan_list, sk); + l2cap_pi(sk)->conn = NULL; + hci_conn_put(conn->hcon); + } + + sk->sk_state = BT_CLOSED; + sock_set_flag(sk, SOCK_ZAPPED); + + if (err) + sk->sk_err = err; + + if (parent) { + bt_accept_unlink(sk); + parent->sk_data_ready(parent, 0); + } else + sk->sk_state_change(sk); +} + /* ---- L2CAP connections ---- */ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status) { - struct l2cap_conn *conn; - - if ((conn = hcon->l2cap_data)) - return conn; + struct l2cap_conn *conn = hcon->l2cap_data; - if (status) + if (conn || status) return conn; - if (!(conn = kmalloc(sizeof(struct l2cap_conn), GFP_ATOMIC))) + conn = kzalloc(sizeof(struct l2cap_conn), GFP_ATOMIC); + if (!conn) return NULL; - memset(conn, 0, sizeof(struct l2cap_conn)); hcon->l2cap_data = conn; conn->hcon = hcon; + BT_DBG("hcon %p conn %p", hcon, conn); + conn->mtu = hcon->hdev->acl_mtu; conn->src = &hcon->hdev->bdaddr; conn->dst = &hcon->dst; @@ -135,17 +282,16 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status) spin_lock_init(&conn->lock); rwlock_init(&conn->chan_list.lock); - BT_DBG("hcon %p conn %p", hcon, conn); return conn; } -static int l2cap_conn_del(struct hci_conn *hcon, int err) +static void l2cap_conn_del(struct hci_conn *hcon, int err) { - struct l2cap_conn *conn; + struct l2cap_conn *conn = hcon->l2cap_data; struct sock *sk; - if (!(conn = hcon->l2cap_data)) - return 0; + if (!conn) + return; BT_DBG("hcon %p conn %p, err %d", hcon, conn, err); @@ -162,15 +308,14 @@ static int l2cap_conn_del(struct hci_conn *hcon, int err) hcon->l2cap_data = NULL; kfree(conn); - return 0; } static inline void l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct sock *parent) { struct l2cap_chan_list *l = &conn->chan_list; - write_lock(&l->lock); + write_lock_bh(&l->lock); __l2cap_chan_add(conn, sk, parent); - write_unlock(&l->lock); + write_unlock_bh(&l->lock); } static inline u8 l2cap_get_ident(struct l2cap_conn *conn) @@ -183,14 +328,14 @@ static inline u8 l2cap_get_ident(struct l2cap_conn *conn) * 200 - 254 are used by utilities like l2ping, etc. */ - spin_lock(&conn->lock); + spin_lock_bh(&conn->lock); if (++conn->tx_ident > 128) conn->tx_ident = 1; id = conn->tx_ident; - spin_unlock(&conn->lock); + spin_unlock_bh(&conn->lock); return id; } @@ -926,160 +1071,6 @@ static int l2cap_sock_release(struct socket *sock) return err; } -/* ---- L2CAP channels ---- */ -static struct sock *__l2cap_get_chan_by_dcid(struct l2cap_chan_list *l, u16 cid) -{ - struct sock *s; - for (s = l->head; s; s = l2cap_pi(s)->next_c) { - if (l2cap_pi(s)->dcid == cid) - break; - } - return s; -} - -static struct sock *__l2cap_get_chan_by_scid(struct l2cap_chan_list *l, u16 cid) -{ - struct sock *s; - for (s = l->head; s; s = l2cap_pi(s)->next_c) { - if (l2cap_pi(s)->scid == cid) - break; - } - return s; -} - -/* Find channel with given SCID. - * Returns locked socket */ -static inline struct sock *l2cap_get_chan_by_scid(struct l2cap_chan_list *l, u16 cid) -{ - struct sock *s; - read_lock(&l->lock); - s = __l2cap_get_chan_by_scid(l, cid); - if (s) bh_lock_sock(s); - read_unlock(&l->lock); - return s; -} - -static struct sock *__l2cap_get_chan_by_ident(struct l2cap_chan_list *l, u8 ident) -{ - struct sock *s; - for (s = l->head; s; s = l2cap_pi(s)->next_c) { - if (l2cap_pi(s)->ident == ident) - break; - } - return s; -} - -static inline struct sock *l2cap_get_chan_by_ident(struct l2cap_chan_list *l, u8 ident) -{ - struct sock *s; - read_lock(&l->lock); - s = __l2cap_get_chan_by_ident(l, ident); - if (s) bh_lock_sock(s); - read_unlock(&l->lock); - return s; -} - -static u16 l2cap_alloc_cid(struct l2cap_chan_list *l) -{ - u16 cid = 0x0040; - - for (; cid < 0xffff; cid++) { - if(!__l2cap_get_chan_by_scid(l, cid)) - return cid; - } - - return 0; -} - -static inline void __l2cap_chan_link(struct l2cap_chan_list *l, struct sock *sk) -{ - sock_hold(sk); - - if (l->head) - l2cap_pi(l->head)->prev_c = sk; - - l2cap_pi(sk)->next_c = l->head; - l2cap_pi(sk)->prev_c = NULL; - l->head = sk; -} - -static inline void l2cap_chan_unlink(struct l2cap_chan_list *l, struct sock *sk) -{ - struct sock *next = l2cap_pi(sk)->next_c, *prev = l2cap_pi(sk)->prev_c; - - write_lock(&l->lock); - if (sk == l->head) - l->head = next; - - if (next) - l2cap_pi(next)->prev_c = prev; - if (prev) - l2cap_pi(prev)->next_c = next; - write_unlock(&l->lock); - - __sock_put(sk); -} - -static void __l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct sock *parent) -{ - struct l2cap_chan_list *l = &conn->chan_list; - - BT_DBG("conn %p, psm 0x%2.2x, dcid 0x%4.4x", conn, l2cap_pi(sk)->psm, l2cap_pi(sk)->dcid); - - l2cap_pi(sk)->conn = conn; - - if (sk->sk_type == SOCK_SEQPACKET) { - /* Alloc CID for connection-oriented socket */ - l2cap_pi(sk)->scid = l2cap_alloc_cid(l); - } else if (sk->sk_type == SOCK_DGRAM) { - /* Connectionless socket */ - l2cap_pi(sk)->scid = 0x0002; - l2cap_pi(sk)->dcid = 0x0002; - l2cap_pi(sk)->omtu = L2CAP_DEFAULT_MTU; - } else { - /* Raw socket can send/recv signalling messages only */ - l2cap_pi(sk)->scid = 0x0001; - l2cap_pi(sk)->dcid = 0x0001; - l2cap_pi(sk)->omtu = L2CAP_DEFAULT_MTU; - } - - __l2cap_chan_link(l, sk); - - if (parent) - bt_accept_enqueue(parent, sk); -} - -/* Delete channel. - * Must be called on the locked socket. */ -static void l2cap_chan_del(struct sock *sk, int err) -{ - struct l2cap_conn *conn = l2cap_pi(sk)->conn; - struct sock *parent = bt_sk(sk)->parent; - - l2cap_sock_clear_timer(sk); - - BT_DBG("sk %p, conn %p, err %d", sk, conn, err); - - if (conn) { - /* Unlink from channel list */ - l2cap_chan_unlink(&conn->chan_list, sk); - l2cap_pi(sk)->conn = NULL; - hci_conn_put(conn->hcon); - } - - sk->sk_state = BT_CLOSED; - sock_set_flag(sk, SOCK_ZAPPED); - - if (err) - sk->sk_err = err; - - if (parent) { - bt_accept_unlink(sk); - parent->sk_data_ready(parent, 0); - } else - sk->sk_state_change(sk); -} - static void l2cap_conn_ready(struct l2cap_conn *conn) { struct l2cap_chan_list *l = &conn->chan_list; @@ -1425,11 +1416,11 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd if (!sk) goto response; - write_lock(&list->lock); + write_lock_bh(&list->lock); /* Check if we already have channel with that dcid */ if (__l2cap_get_chan_by_dcid(list, scid)) { - write_unlock(&list->lock); + write_unlock_bh(&list->lock); sock_set_flag(sk, SOCK_ZAPPED); l2cap_sock_kill(sk); goto response; @@ -1467,7 +1458,7 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd result = status = 0; done: - write_unlock(&list->lock); + write_unlock_bh(&list->lock); response: bh_unlock_sock(parent); @@ -1835,7 +1826,9 @@ drop: kfree_skb(skb); done: - if (sk) bh_unlock_sock(sk); + if (sk) + bh_unlock_sock(sk); + return 0; } @@ -1926,18 +1919,18 @@ static int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type) static int l2cap_connect_cfm(struct hci_conn *hcon, u8 status) { + struct l2cap_conn *conn; + BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status); if (hcon->type != ACL_LINK) return 0; if (!status) { - struct l2cap_conn *conn; - conn = l2cap_conn_add(hcon, status); if (conn) l2cap_conn_ready(conn); - } else + } else l2cap_conn_del(hcon, bt_err(status)); return 0; @@ -1951,19 +1944,21 @@ static int l2cap_disconn_ind(struct hci_conn *hcon, u8 reason) return 0; l2cap_conn_del(hcon, bt_err(reason)); + return 0; } static int l2cap_auth_cfm(struct hci_conn *hcon, u8 status) { struct l2cap_chan_list *l; - struct l2cap_conn *conn; + struct l2cap_conn *conn = conn = hcon->l2cap_data; struct l2cap_conn_rsp rsp; struct sock *sk; int result; - if (!(conn = hcon->l2cap_data)) + if (!conn) return 0; + l = &conn->chan_list; BT_DBG("conn %p", conn); @@ -2006,13 +2001,14 @@ static int l2cap_auth_cfm(struct hci_conn *hcon, u8 status) static int l2cap_encrypt_cfm(struct hci_conn *hcon, u8 status) { struct l2cap_chan_list *l; - struct l2cap_conn *conn; + struct l2cap_conn *conn = hcon->l2cap_data; struct l2cap_conn_rsp rsp; struct sock *sk; int result; - if (!(conn = hcon->l2cap_data)) + if (!conn) return 0; + l = &conn->chan_list; BT_DBG("conn %p", conn); @@ -2220,7 +2216,7 @@ static int __init l2cap_init(void) goto error; } - class_create_file(&bt_class, &class_attr_l2cap); + class_create_file(bt_class, &class_attr_l2cap); BT_INFO("L2CAP ver %s", VERSION); BT_INFO("L2CAP socket layer initialized"); @@ -2234,7 +2230,7 @@ error: static void __exit l2cap_exit(void) { - class_remove_file(&bt_class, &class_attr_l2cap); + class_remove_file(bt_class, &class_attr_l2cap); if (bt_sock_unregister(BTPROTO_L2CAP) < 0) BT_ERR("L2CAP socket unregistration failed"); diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c index ee6a66979913..e5fd0cb70ae9 100644 --- a/net/bluetooth/lib.c +++ b/net/bluetooth/lib.c @@ -24,7 +24,6 @@ /* Bluetooth kernel library. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/kernel.h> diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index e99010ce8bb2..77eab8f4c7fd 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -27,7 +27,6 @@ * $Id: core.c,v 1.42 2002/10/01 23:26:25 maxk Exp $ */ -#include <linux/config.h> #include <linux/module.h> #include <linux/errno.h> #include <linux/kernel.h> @@ -53,8 +52,9 @@ #define BT_DBG(D...) #endif -#define VERSION "1.7" +#define VERSION "1.8" +static int disable_cfc = 0; static unsigned int l2cap_mtu = RFCOMM_MAX_L2CAP_MTU; static struct task_struct *rfcomm_thread; @@ -273,10 +273,10 @@ static void rfcomm_dlc_clear_state(struct rfcomm_dlc *d) struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio) { - struct rfcomm_dlc *d = kmalloc(sizeof(*d), prio); + struct rfcomm_dlc *d = kzalloc(sizeof(*d), prio); + if (!d) return NULL; - memset(d, 0, sizeof(*d)); init_timer(&d->timer); d->timer.function = rfcomm_dlc_timeout; @@ -289,6 +289,7 @@ struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio) rfcomm_dlc_clear_state(d); BT_DBG("%p", d); + return d; } @@ -522,10 +523,10 @@ int rfcomm_dlc_get_modem_status(struct rfcomm_dlc *d, u8 *v24_sig) /* ---- RFCOMM sessions ---- */ static struct rfcomm_session *rfcomm_session_add(struct socket *sock, int state) { - struct rfcomm_session *s = kmalloc(sizeof(*s), GFP_KERNEL); + struct rfcomm_session *s = kzalloc(sizeof(*s), GFP_KERNEL); + if (!s) return NULL; - memset(s, 0, sizeof(*s)); BT_DBG("session %p sock %p", s, sock); @@ -534,7 +535,7 @@ static struct rfcomm_session *rfcomm_session_add(struct socket *sock, int state) s->sock = sock; s->mtu = RFCOMM_DEFAULT_MTU; - s->cfc = RFCOMM_CFC_UNKNOWN; + s->cfc = disable_cfc ? RFCOMM_CFC_DISABLED : RFCOMM_CFC_UNKNOWN; /* Do not increment module usage count for listening sessions. * Otherwise we won't be able to unload the module. */ @@ -1150,6 +1151,8 @@ static inline int rfcomm_check_link_mode(struct rfcomm_dlc *d) static void rfcomm_dlc_accept(struct rfcomm_dlc *d) { + struct sock *sk = d->session->sock->sk; + BT_DBG("dlc %p", d); rfcomm_send_ua(d->session, d->dlci); @@ -1159,6 +1162,9 @@ static void rfcomm_dlc_accept(struct rfcomm_dlc *d) d->state_change(d, 0); rfcomm_dlc_unlock(d); + if (d->link_mode & RFCOMM_LM_MASTER) + hci_conn_switch_role(l2cap_pi(sk)->conn->hcon, 0x00); + rfcomm_send_msc(d->session, 1, d->dlci, d->v24_sig); } @@ -1223,14 +1229,18 @@ static int rfcomm_apply_pn(struct rfcomm_dlc *d, int cr, struct rfcomm_pn *pn) BT_DBG("dlc %p state %ld dlci %d mtu %d fc 0x%x credits %d", d, d->state, d->dlci, pn->mtu, pn->flow_ctrl, pn->credits); - if (pn->flow_ctrl == 0xf0 || pn->flow_ctrl == 0xe0) { - d->cfc = s->cfc = RFCOMM_CFC_ENABLED; + if ((pn->flow_ctrl == 0xf0 && s->cfc != RFCOMM_CFC_DISABLED) || + pn->flow_ctrl == 0xe0) { + d->cfc = RFCOMM_CFC_ENABLED; d->tx_credits = pn->credits; } else { - d->cfc = s->cfc = RFCOMM_CFC_DISABLED; + d->cfc = RFCOMM_CFC_DISABLED; set_bit(RFCOMM_TX_THROTTLED, &d->flags); } + if (s->cfc == RFCOMM_CFC_UNKNOWN) + s->cfc = d->cfc; + d->priority = pn->priority; d->mtu = s->mtu = btohs(pn->mtu); @@ -2036,7 +2046,7 @@ static int __init rfcomm_init(void) kernel_thread(rfcomm_run, NULL, CLONE_KERNEL); - class_create_file(&bt_class, &class_attr_rfcomm_dlc); + class_create_file(bt_class, &class_attr_rfcomm_dlc); rfcomm_init_sockets(); @@ -2051,7 +2061,7 @@ static int __init rfcomm_init(void) static void __exit rfcomm_exit(void) { - class_remove_file(&bt_class, &class_attr_rfcomm_dlc); + class_remove_file(bt_class, &class_attr_rfcomm_dlc); hci_unregister_cb(&rfcomm_cb); @@ -2074,6 +2084,9 @@ static void __exit rfcomm_exit(void) module_init(rfcomm_init); module_exit(rfcomm_exit); +module_param(disable_cfc, bool, 0644); +MODULE_PARM_DESC(disable_cfc, "Disable credit based flow control"); + module_param(l2cap_mtu, uint, 0644); MODULE_PARM_DESC(l2cap_mtu, "Default MTU for the L2CAP connection"); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 757d2dd3b02f..220fee04e7f2 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -27,7 +27,6 @@ * $Id: sock.c,v 1.24 2002/10/03 01:00:34 maxk Exp $ */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> @@ -945,7 +944,7 @@ int __init rfcomm_init_sockets(void) if (err < 0) goto error; - class_create_file(&bt_class, &class_attr_rfcomm); + class_create_file(bt_class, &class_attr_rfcomm); BT_INFO("RFCOMM socket layer initialized"); @@ -959,7 +958,7 @@ error: void __exit rfcomm_cleanup_sockets(void) { - class_remove_file(&bt_class, &class_attr_rfcomm); + class_remove_file(bt_class, &class_attr_rfcomm); if (bt_sock_unregister(BTPROTO_RFCOMM) < 0) BT_ERR("RFCOMM socket layer unregistration failed"); diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 74368f79ee5d..bd8d671a0ba6 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -27,7 +27,6 @@ * $Id: tty.c,v 1.24 2002/10/03 01:54:38 holtmann Exp $ */ -#include <linux/config.h> #include <linux/module.h> #include <linux/tty.h> @@ -170,10 +169,9 @@ static int rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc) BT_DBG("id %d channel %d", req->dev_id, req->channel); - dev = kmalloc(sizeof(struct rfcomm_dev), GFP_KERNEL); + dev = kzalloc(sizeof(struct rfcomm_dev), GFP_KERNEL); if (!dev) return -ENOMEM; - memset(dev, 0, sizeof(struct rfcomm_dev)); write_lock_bh(&rfcomm_dev_lock); @@ -480,12 +478,8 @@ static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb) BT_DBG("dlc %p tty %p len %d", dlc, tty, skb->len); - if (test_bit(TTY_DONT_FLIP, &tty->flags)) { - tty_buffer_request_room(tty, skb->len); - tty_insert_flip_string(tty, skb->data, skb->len); - tty_flip_buffer_push(tty); - } else - tty->ldisc.receive_buf(tty, skb->data, NULL, skb->len); + tty_insert_flip_string(tty, skb->data, skb->len); + tty_flip_buffer_push(tty); kfree_skb(skb); } @@ -1025,13 +1019,12 @@ int rfcomm_init_ttys(void) rfcomm_tty_driver->owner = THIS_MODULE; rfcomm_tty_driver->driver_name = "rfcomm"; - rfcomm_tty_driver->devfs_name = "bluetooth/rfcomm/"; rfcomm_tty_driver->name = "rfcomm"; rfcomm_tty_driver->major = RFCOMM_TTY_MAJOR; rfcomm_tty_driver->minor_start = RFCOMM_TTY_MINOR; rfcomm_tty_driver->type = TTY_DRIVER_TYPE_SERIAL; rfcomm_tty_driver->subtype = SERIAL_TYPE_NORMAL; - rfcomm_tty_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_NO_DEVFS; + rfcomm_tty_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV; rfcomm_tty_driver->init_termios = tty_std_termios; rfcomm_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL; tty_set_operations(rfcomm_tty_driver, &rfcomm_ops); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 0c2d13ad69bb..7714a2ec3854 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -24,7 +24,6 @@ /* Bluetooth SCO sockets. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> @@ -109,17 +108,14 @@ static void sco_sock_init_timer(struct sock *sk) static struct sco_conn *sco_conn_add(struct hci_conn *hcon, __u8 status) { struct hci_dev *hdev = hcon->hdev; - struct sco_conn *conn; - - if ((conn = hcon->sco_data)) - return conn; + struct sco_conn *conn = hcon->sco_data; - if (status) + if (conn || status) return conn; - if (!(conn = kmalloc(sizeof(struct sco_conn), GFP_ATOMIC))) + conn = kzalloc(sizeof(struct sco_conn), GFP_ATOMIC); + if (!conn) return NULL; - memset(conn, 0, sizeof(struct sco_conn)); spin_lock_init(&conn->lock); @@ -135,6 +131,7 @@ static struct sco_conn *sco_conn_add(struct hci_conn *hcon, __u8 status) conn->mtu = 60; BT_DBG("hcon %p conn %p", hcon, conn); + return conn; } @@ -970,7 +967,7 @@ static int __init sco_init(void) goto error; } - class_create_file(&bt_class, &class_attr_sco); + class_create_file(bt_class, &class_attr_sco); BT_INFO("SCO (Voice Link) ver %s", VERSION); BT_INFO("SCO socket layer initialized"); @@ -984,7 +981,7 @@ error: static void __exit sco_exit(void) { - class_remove_file(&bt_class, &class_attr_sco); + class_remove_file(bt_class, &class_attr_sco); if (bt_sock_unregister(BTPROTO_SCO) < 0) BT_ERR("SCO socket unregistration failed"); diff --git a/net/bridge/Makefile b/net/bridge/Makefile index 59556e40e143..f444c12cde5a 100644 --- a/net/bridge/Makefile +++ b/net/bridge/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_BRIDGE) += bridge.o bridge-y := br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \ br_ioctl.o br_notify.o br_stp.o br_stp_bpdu.o \ - br_stp_if.o br_stp_timer.o + br_stp_if.o br_stp_timer.o br_netlink.o bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o diff --git a/net/bridge/br.c b/net/bridge/br.c index 12da21afb9ca..2994387999a8 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -13,7 +13,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/netdevice.h> @@ -30,36 +29,46 @@ static struct llc_sap *br_stp_sap; static int __init br_init(void) { + int err; + br_stp_sap = llc_sap_open(LLC_SAP_BSPAN, br_stp_rcv); if (!br_stp_sap) { printk(KERN_ERR "bridge: can't register sap for STP\n"); - return -EBUSY; + return -EADDRINUSE; } br_fdb_init(); -#ifdef CONFIG_BRIDGE_NETFILTER - if (br_netfilter_init()) - return 1; -#endif + err = br_netfilter_init(); + if (err) + goto err_out1; + + err = register_netdevice_notifier(&br_device_notifier); + if (err) + goto err_out2; + + br_netlink_init(); brioctl_set(br_ioctl_deviceless_stub); br_handle_frame_hook = br_handle_frame; br_fdb_get_hook = br_fdb_get; br_fdb_put_hook = br_fdb_put; - register_netdevice_notifier(&br_device_notifier); - return 0; + +err_out2: + br_netfilter_fini(); +err_out1: + llc_sap_put(br_stp_sap); + return err; } static void __exit br_deinit(void) { rcu_assign_pointer(br_stp_sap->rcv_func, NULL); -#ifdef CONFIG_BRIDGE_NETFILTER + br_netlink_fini(); br_netfilter_fini(); -#endif unregister_netdevice_notifier(&br_device_notifier); brioctl_set(NULL); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 0c88a2ac32c1..f8dbcee80eba 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -145,9 +145,9 @@ static int br_set_tx_csum(struct net_device *dev, u32 data) struct net_bridge *br = netdev_priv(dev); if (data) - br->feature_mask |= NETIF_F_IP_CSUM; + br->feature_mask |= NETIF_F_NO_CSUM; else - br->feature_mask &= ~NETIF_F_IP_CSUM; + br->feature_mask &= ~NETIF_F_ALL_CSUM; br_features_recompute(br); return 0; @@ -184,6 +184,6 @@ void br_dev_setup(struct net_device *dev) dev->set_mac_address = br_set_mac_address; dev->priv_flags = IFF_EBRIDGE; - dev->features = NETIF_F_SG | NETIF_F_FRAGLIST - | NETIF_F_HIGHDMA | NETIF_F_TSO | NETIF_F_IP_CSUM; + dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | + NETIF_F_TSO | NETIF_F_NO_CSUM | NETIF_F_GSO_ROBUST; } diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 56f3aa47e758..6ccd32b30809 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -20,14 +20,11 @@ #include <linux/netfilter_bridge.h> #include "br_private.h" +/* Don't forward packets to originating port or forwarding diasabled */ static inline int should_deliver(const struct net_bridge_port *p, const struct sk_buff *skb) { - if (skb->dev == p->dev || - p->state != BR_STATE_FORWARDING) - return 0; - - return 1; + return (skb->dev != p->dev && p->state == BR_STATE_FORWARDING); } static inline unsigned packet_length(const struct sk_buff *skb) @@ -37,8 +34,8 @@ static inline unsigned packet_length(const struct sk_buff *skb) int br_dev_queue_push_xmit(struct sk_buff *skb) { - /* drop mtu oversized packets except tso */ - if (packet_length(skb) > skb->dev->mtu && !skb_shinfo(skb)->tso_size) + /* drop mtu oversized packets except gso */ + if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) kfree_skb(skb); else { #ifdef CONFIG_BRIDGE_NETFILTER @@ -55,10 +52,9 @@ int br_dev_queue_push_xmit(struct sk_buff *skb) int br_forward_finish(struct sk_buff *skb) { - NF_HOOK(PF_BRIDGE, NF_BR_POST_ROUTING, skb, NULL, skb->dev, - br_dev_queue_push_xmit); + return NF_HOOK(PF_BRIDGE, NF_BR_POST_ROUTING, skb, NULL, skb->dev, + br_dev_queue_push_xmit); - return 0; } static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index ad1c7af65ec8..f55ef682ef84 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -300,25 +300,20 @@ int br_add_bridge(const char *name) rtnl_lock(); if (strchr(dev->name, '%')) { ret = dev_alloc_name(dev, dev->name); - if (ret < 0) - goto err1; + if (ret < 0) { + free_netdev(dev); + goto out; + } } ret = register_netdevice(dev); if (ret) - goto err2; + goto out; ret = br_sysfs_addbr(dev); if (ret) - goto err3; - rtnl_unlock(); - return 0; - - err3: - unregister_netdev(dev); - err2: - free_netdev(dev); - err1: + unregister_netdevice(dev); + out: rtnl_unlock(); return ret; } @@ -377,17 +372,28 @@ void br_features_recompute(struct net_bridge *br) struct net_bridge_port *p; unsigned long features, checksum; - features = br->feature_mask &~ NETIF_F_IP_CSUM; - checksum = br->feature_mask & NETIF_F_IP_CSUM; + checksum = br->feature_mask & NETIF_F_ALL_CSUM ? NETIF_F_NO_CSUM : 0; + features = br->feature_mask & ~NETIF_F_ALL_CSUM; list_for_each_entry(p, &br->port_list, list) { - if (!(p->dev->features - & (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM))) + unsigned long feature = p->dev->features; + + if (checksum & NETIF_F_NO_CSUM && !(feature & NETIF_F_NO_CSUM)) + checksum ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM; + if (checksum & NETIF_F_HW_CSUM && !(feature & NETIF_F_HW_CSUM)) + checksum ^= NETIF_F_HW_CSUM | NETIF_F_IP_CSUM; + if (!(feature & NETIF_F_IP_CSUM)) checksum = 0; - features &= p->dev->features; + + if (feature & NETIF_F_GSO) + feature |= NETIF_F_TSO; + feature |= NETIF_F_GSO; + + features &= feature; } - br->dev->features = features | checksum | NETIF_F_LLTX; + br->dev->features = features | checksum | NETIF_F_LLTX | + NETIF_F_GSO_ROBUST; } /* called with RTNL */ diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 3da9264449f7..cbc8a389a0a8 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -407,12 +407,8 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook, if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { if (pkt_len + sizeof(struct ipv6hdr) > skb->len) goto inhdr_error; - if (pkt_len + sizeof(struct ipv6hdr) < skb->len) { - if (__pskb_trim(skb, pkt_len + sizeof(struct ipv6hdr))) - goto inhdr_error; - if (skb->ip_summed == CHECKSUM_HW) - skb->ip_summed = CHECKSUM_NONE; - } + if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) + goto inhdr_error; } if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb)) goto inhdr_error; @@ -495,11 +491,7 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb, if (skb->len < len || len < 4 * iph->ihl) goto inhdr_error; - if (skb->len > len) { - __pskb_trim(skb, len); - if (skb->ip_summed == CHECKSUM_HW) - skb->ip_summed = CHECKSUM_NONE; - } + pskb_trim_rcsum(skb, len); nf_bridge_put(skb->nf_bridge); if (!nf_bridge_alloc(skb)) @@ -769,7 +761,7 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb) { if (skb->protocol == htons(ETH_P_IP) && skb->len > skb->dev->mtu && - !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size)) + !skb_is_gso(skb)) return ip_fragment(skb, br_dev_queue_push_xmit); else return br_dev_queue_push_xmit(skb); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c new file mode 100644 index 000000000000..06abb6634f5b --- /dev/null +++ b/net/bridge/br_netlink.c @@ -0,0 +1,200 @@ +/* + * Bridge netlink control interface + * + * Authors: + * Stephen Hemminger <shemminger@osdl.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/rtnetlink.h> +#include "br_private.h" + +/* + * Create one netlink message for one interface + * Contains port and master info as well as carrier and bridge state. + */ +static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *port, + u32 pid, u32 seq, int event, unsigned int flags) +{ + const struct net_bridge *br = port->br; + const struct net_device *dev = port->dev; + struct ifinfomsg *r; + struct nlmsghdr *nlh; + unsigned char *b = skb->tail; + u32 mtu = dev->mtu; + u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; + u8 portstate = port->state; + + pr_debug("br_fill_info event %d port %s master %s\n", + event, dev->name, br->dev->name); + + nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags); + r = NLMSG_DATA(nlh); + r->ifi_family = AF_BRIDGE; + r->__ifi_pad = 0; + r->ifi_type = dev->type; + r->ifi_index = dev->ifindex; + r->ifi_flags = dev_get_flags(dev); + r->ifi_change = 0; + + RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name); + + RTA_PUT(skb, IFLA_MASTER, sizeof(int), &br->dev->ifindex); + + if (dev->addr_len) + RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr); + + RTA_PUT(skb, IFLA_MTU, sizeof(mtu), &mtu); + if (dev->ifindex != dev->iflink) + RTA_PUT(skb, IFLA_LINK, sizeof(int), &dev->iflink); + + + RTA_PUT(skb, IFLA_OPERSTATE, sizeof(operstate), &operstate); + + if (event == RTM_NEWLINK) + RTA_PUT(skb, IFLA_PROTINFO, sizeof(portstate), &portstate); + + nlh->nlmsg_len = skb->tail - b; + + return skb->len; + +nlmsg_failure: +rtattr_failure: + + skb_trim(skb, b - skb->data); + return -EINVAL; +} + +/* + * Notify listeners of a change in port information + */ +void br_ifinfo_notify(int event, struct net_bridge_port *port) +{ + struct sk_buff *skb; + int err = -ENOMEM; + + pr_debug("bridge notify event=%d\n", event); + skb = alloc_skb(NLMSG_SPACE(sizeof(struct ifinfomsg) + 128), + GFP_ATOMIC); + if (!skb) + goto err_out; + + err = br_fill_ifinfo(skb, port, current->pid, 0, event, 0); + if (err) + goto err_kfree; + + NETLINK_CB(skb).dst_group = RTNLGRP_LINK; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC); + return; + +err_kfree: + kfree_skb(skb); +err_out: + netlink_set_err(rtnl, 0, RTNLGRP_LINK, err); +} + +/* + * Dump information about all ports, in response to GETLINK + */ +static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net_device *dev; + int idx; + int s_idx = cb->args[0]; + int err = 0; + + read_lock(&dev_base_lock); + for (dev = dev_base, idx = 0; dev; dev = dev->next) { + struct net_bridge_port *p = dev->br_port; + + /* not a bridge port */ + if (!p) + continue; + + if (idx < s_idx) + goto cont; + + err = br_fill_ifinfo(skb, p, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI); + if (err <= 0) + break; +cont: + ++idx; + } + read_unlock(&dev_base_lock); + + cb->args[0] = idx; + + return skb->len; +} + +/* + * Change state of port (ie from forwarding to blocking etc) + * Used by spanning tree in user space. + */ +static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + struct rtattr **rta = arg; + struct ifinfomsg *ifm = NLMSG_DATA(nlh); + struct net_device *dev; + struct net_bridge_port *p; + u8 new_state; + + if (ifm->ifi_family != AF_BRIDGE) + return -EPFNOSUPPORT; + + /* Must pass valid state as PROTINFO */ + if (rta[IFLA_PROTINFO-1]) { + u8 *pstate = RTA_DATA(rta[IFLA_PROTINFO-1]); + new_state = *pstate; + } else + return -EINVAL; + + if (new_state > BR_STATE_BLOCKING) + return -EINVAL; + + /* Find bridge port */ + dev = __dev_get_by_index(ifm->ifi_index); + if (!dev) + return -ENODEV; + + p = dev->br_port; + if (!p) + return -EINVAL; + + /* if kernel STP is running, don't allow changes */ + if (p->br->stp_enabled) + return -EBUSY; + + if (!netif_running(dev)) + return -ENETDOWN; + + if (!netif_carrier_ok(dev) && new_state != BR_STATE_DISABLED) + return -ENETDOWN; + + p->state = new_state; + br_log_state(p); + return 0; +} + + +static struct rtnetlink_link bridge_rtnetlink_table[RTM_NR_MSGTYPES] = { + [RTM_GETLINK - RTM_BASE] = { .dumpit = br_dump_ifinfo, }, + [RTM_SETLINK - RTM_BASE] = { .doit = br_rtm_setlink, }, +}; + +void __init br_netlink_init(void) +{ + rtnetlink_links[PF_BRIDGE] = bridge_rtnetlink_table; +} + +void __exit br_netlink_fini(void) +{ + rtnetlink_links[PF_BRIDGE] = NULL; +} + diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c index a43a9c1d50d7..20278494e4da 100644 --- a/net/bridge/br_notify.c +++ b/net/bridge/br_notify.c @@ -14,6 +14,7 @@ */ #include <linux/kernel.h> +#include <linux/rtnetlink.h> #include "br_private.h" @@ -49,6 +50,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v case NETDEV_CHANGEADDR: br_fdb_changeaddr(p, dev->dev_addr); + br_ifinfo_notify(RTM_NEWLINK, p); br_stp_recalculate_bridge_id(br); break; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 86ecea7ed372..c491fb2f280e 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -29,7 +29,7 @@ #define BR_PORT_DEBOUNCE (HZ/10) -#define BR_VERSION "2.1" +#define BR_VERSION "2.2" typedef struct bridge_id bridge_id; typedef struct mac_addr mac_addr; @@ -192,8 +192,13 @@ extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); extern int br_ioctl_deviceless_stub(unsigned int cmd, void __user *arg); /* br_netfilter.c */ +#ifdef CONFIG_BRIDGE_NETFILTER extern int br_netfilter_init(void); extern void br_netfilter_fini(void); +#else +#define br_netfilter_init() (0) +#define br_netfilter_fini() do { } while(0) +#endif /* br_stp.c */ extern void br_log_state(const struct net_bridge_port *p); @@ -232,6 +237,11 @@ extern struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br, extern void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent); +/* br_netlink.c */ +extern void br_netlink_init(void); +extern void br_netlink_fini(void); +extern void br_ifinfo_notify(int event, struct net_bridge_port *port); + #ifdef CONFIG_SYSFS /* br_sysfs_if.c */ extern struct sysfs_ops brport_sysfs_ops; diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 23dea1422c9a..14cd025079af 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -16,6 +16,7 @@ #include <linux/kernel.h> #include <linux/smp_lock.h> #include <linux/etherdevice.h> +#include <linux/rtnetlink.h> #include "br_private.h" #include "br_private_stp.h" @@ -86,6 +87,7 @@ void br_stp_disable_bridge(struct net_bridge *br) void br_stp_enable_port(struct net_bridge_port *p) { br_init_port(p); + br_ifinfo_notify(RTM_NEWLINK, p); br_port_state_selection(p->br); } @@ -99,6 +101,8 @@ void br_stp_disable_port(struct net_bridge_port *p) printk(KERN_INFO "%s: port %i(%s) entering %s state\n", br->dev->name, p->port_no, p->dev->name, "disabled"); + br_ifinfo_notify(RTM_DELLINK, p); + wasroot = br_is_root_bridge(br); br_become_designated_port(p); p->state = BR_STATE_DISABLED; diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index ee5a51761260..02693a230dc1 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -29,7 +29,6 @@ */ #include <linux/module.h> -#include <linux/config.h> #include <linux/spinlock.h> #include <linux/socket.h> #include <linux/skbuff.h> diff --git a/net/core/Makefile b/net/core/Makefile index 79fe12cced27..e9bd2467d5a9 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -16,3 +16,4 @@ obj-$(CONFIG_NET_DIVERT) += dv.o obj-$(CONFIG_NET_PKTGEN) += pktgen.o obj-$(CONFIG_WIRELESS_EXT) += wireless.o obj-$(CONFIG_NETPOLL) += netpoll.o +obj-$(CONFIG_NET_DMA) += user_dma.o diff --git a/net/core/dev.c b/net/core/dev.c index 4fba549caf29..4d2b5167d7f5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -76,7 +76,6 @@ #include <asm/system.h> #include <linux/bitops.h> #include <linux/capability.h> -#include <linux/config.h> #include <linux/cpu.h> #include <linux/types.h> #include <linux/kernel.h> @@ -115,6 +114,8 @@ #include <net/iw_handler.h> #include <asm/current.h> #include <linux/audit.h> +#include <linux/dmaengine.h> +#include <linux/err.h> /* * The list of packet types we will receive (as opposed to discard) @@ -148,6 +149,12 @@ static DEFINE_SPINLOCK(ptype_lock); static struct list_head ptype_base[16]; /* 16 way hashed list */ static struct list_head ptype_all; /* Taps */ +#ifdef CONFIG_NET_DMA +static struct dma_client *net_dma_client; +static unsigned int net_dma_count; +static spinlock_t net_dma_event_lock; +#endif + /* * The @dev_base list is protected by @dev_base_lock and the rtnl * semaphore. @@ -222,7 +229,7 @@ extern void netdev_unregister_sysfs(struct net_device *); * For efficiency */ -int netdev_nit; +static int netdev_nit; /* * Add a protocol ID to the list. Now that the input handler is @@ -1041,7 +1048,7 @@ static inline void net_timestamp(struct sk_buff *skb) * taps currently in use. */ -void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) +static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) { struct packet_type *ptype; @@ -1155,9 +1162,17 @@ int skb_checksum_help(struct sk_buff *skb, int inward) unsigned int csum; int ret = 0, offset = skb->h.raw - skb->data; - if (inward) { - skb->ip_summed = CHECKSUM_NONE; - goto out; + if (inward) + goto out_set_summed; + + if (unlikely(skb_shinfo(skb)->gso_size)) { + static int warned; + + WARN_ON(!warned); + warned = 1; + + /* Let GSO fix up the checksum. */ + goto out_set_summed; } if (skb_cloned(skb)) { @@ -1174,11 +1189,70 @@ int skb_checksum_help(struct sk_buff *skb, int inward) BUG_ON(skb->csum + 2 > offset); *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum); + +out_set_summed: skb->ip_summed = CHECKSUM_NONE; out: return ret; } +/** + * skb_gso_segment - Perform segmentation on skb. + * @skb: buffer to segment + * @features: features for the output path (see dev->features) + * + * This function segments the given skb and returns a list of segments. + * + * It may return NULL if the skb requires no segmentation. This is + * only possible when GSO is used for verifying header integrity. + */ +struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) +{ + struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); + struct packet_type *ptype; + int type = skb->protocol; + int err; + + BUG_ON(skb_shinfo(skb)->frag_list); + + skb->mac.raw = skb->data; + skb->mac_len = skb->nh.raw - skb->data; + __skb_pull(skb, skb->mac_len); + + if (unlikely(skb->ip_summed != CHECKSUM_HW)) { + static int warned; + + WARN_ON(!warned); + warned = 1; + + if (skb_header_cloned(skb) && + (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + return ERR_PTR(err); + } + + rcu_read_lock(); + list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) { + if (ptype->type == type && !ptype->dev && ptype->gso_segment) { + if (unlikely(skb->ip_summed != CHECKSUM_HW)) { + err = ptype->gso_send_check(skb); + segs = ERR_PTR(err); + if (err || skb_gso_ok(skb, features)) + break; + __skb_push(skb, skb->data - skb->nh.raw); + } + segs = ptype->gso_segment(skb, features); + break; + } + } + rcu_read_unlock(); + + __skb_push(skb, skb->data - skb->mac.raw); + + return segs; +} + +EXPORT_SYMBOL(skb_gso_segment); + /* Take action when hardware reception checksum errors are detected. */ #ifdef CONFIG_BUG void netdev_rx_csum_fault(struct net_device *dev) @@ -1192,7 +1266,6 @@ void netdev_rx_csum_fault(struct net_device *dev) EXPORT_SYMBOL(netdev_rx_csum_fault); #endif -#ifdef CONFIG_HIGHMEM /* Actually, we should eliminate this check as soon as we know, that: * 1. IOMMU is present and allows to map all the memory. * 2. No high memory really exists on this machine. @@ -1200,6 +1273,7 @@ EXPORT_SYMBOL(netdev_rx_csum_fault); static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) { +#ifdef CONFIG_HIGHMEM int i; if (dev->features & NETIF_F_HIGHDMA) @@ -1209,81 +1283,112 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) if (PageHighMem(skb_shinfo(skb)->frags[i].page)) return 1; +#endif return 0; } -#else -#define illegal_highdma(dev, skb) (0) -#endif -/* Keep head the same: replace data */ -int __skb_linearize(struct sk_buff *skb, gfp_t gfp_mask) +struct dev_gso_cb { + void (*destructor)(struct sk_buff *skb); +}; + +#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb) + +static void dev_gso_skb_destructor(struct sk_buff *skb) { - unsigned int size; - u8 *data; - long offset; - struct skb_shared_info *ninfo; - int headerlen = skb->data - skb->head; - int expand = (skb->tail + skb->data_len) - skb->end; + struct dev_gso_cb *cb; - if (skb_shared(skb)) - BUG(); + do { + struct sk_buff *nskb = skb->next; - if (expand <= 0) - expand = 0; + skb->next = nskb->next; + nskb->next = NULL; + kfree_skb(nskb); + } while (skb->next); - size = skb->end - skb->head + expand; - size = SKB_DATA_ALIGN(size); - data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); - if (!data) - return -ENOMEM; + cb = DEV_GSO_CB(skb); + if (cb->destructor) + cb->destructor(skb); +} - /* Copy entire thing */ - if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len)) - BUG(); +/** + * dev_gso_segment - Perform emulated hardware segmentation on skb. + * @skb: buffer to segment + * + * This function segments the given skb and stores the list of segments + * in skb->next. + */ +static int dev_gso_segment(struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + struct sk_buff *segs; + int features = dev->features & ~(illegal_highdma(dev, skb) ? + NETIF_F_SG : 0); - /* Set up shinfo */ - ninfo = (struct skb_shared_info*)(data + size); - atomic_set(&ninfo->dataref, 1); - ninfo->tso_size = skb_shinfo(skb)->tso_size; - ninfo->tso_segs = skb_shinfo(skb)->tso_segs; - ninfo->nr_frags = 0; - ninfo->frag_list = NULL; + segs = skb_gso_segment(skb, features); - /* Offset between the two in bytes */ - offset = data - skb->head; + /* Verifying header integrity only. */ + if (!segs) + return 0; - /* Free old data. */ - skb_release_data(skb); + if (unlikely(IS_ERR(segs))) + return PTR_ERR(segs); - skb->head = data; - skb->end = data + size; + skb->next = segs; + DEV_GSO_CB(skb)->destructor = skb->destructor; + skb->destructor = dev_gso_skb_destructor; - /* Set up new pointers */ - skb->h.raw += offset; - skb->nh.raw += offset; - skb->mac.raw += offset; - skb->tail += offset; - skb->data += offset; + return 0; +} - /* We are no longer a clone, even if we were. */ - skb->cloned = 0; +int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + if (likely(!skb->next)) { + if (netdev_nit) + dev_queue_xmit_nit(skb, dev); - skb->tail += skb->data_len; - skb->data_len = 0; + if (netif_needs_gso(dev, skb)) { + if (unlikely(dev_gso_segment(skb))) + goto out_kfree_skb; + if (skb->next) + goto gso; + } + + return dev->hard_start_xmit(skb, dev); + } + +gso: + do { + struct sk_buff *nskb = skb->next; + int rc; + + skb->next = nskb->next; + nskb->next = NULL; + rc = dev->hard_start_xmit(nskb, dev); + if (unlikely(rc)) { + nskb->next = skb->next; + skb->next = nskb; + return rc; + } + if (unlikely(netif_queue_stopped(dev) && skb->next)) + return NETDEV_TX_BUSY; + } while (skb->next); + + skb->destructor = DEV_GSO_CB(skb)->destructor; + +out_kfree_skb: + kfree_skb(skb); return 0; } #define HARD_TX_LOCK(dev, cpu) { \ if ((dev->features & NETIF_F_LLTX) == 0) { \ - spin_lock(&dev->xmit_lock); \ - dev->xmit_lock_owner = cpu; \ + netif_tx_lock(dev); \ } \ } #define HARD_TX_UNLOCK(dev) { \ if ((dev->features & NETIF_F_LLTX) == 0) { \ - dev->xmit_lock_owner = -1; \ - spin_unlock(&dev->xmit_lock); \ + netif_tx_unlock(dev); \ } \ } @@ -1319,9 +1424,13 @@ int dev_queue_xmit(struct sk_buff *skb) struct Qdisc *q; int rc = -ENOMEM; + /* GSO will handle the following emulations directly. */ + if (netif_needs_gso(dev, skb)) + goto gso; + if (skb_shinfo(skb)->frag_list && !(dev->features & NETIF_F_FRAGLIST) && - __skb_linearize(skb, GFP_ATOMIC)) + __skb_linearize(skb)) goto out_kfree_skb; /* Fragmented skb is linearized if device does not support SG, @@ -1330,25 +1439,26 @@ int dev_queue_xmit(struct sk_buff *skb) */ if (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) && - __skb_linearize(skb, GFP_ATOMIC)) + __skb_linearize(skb)) goto out_kfree_skb; /* If packet is not checksummed and device does not support * checksumming for this protocol, complete checksumming here. */ if (skb->ip_summed == CHECKSUM_HW && - (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) && + (!(dev->features & NETIF_F_GEN_CSUM) && (!(dev->features & NETIF_F_IP_CSUM) || skb->protocol != htons(ETH_P_IP)))) if (skb_checksum_help(skb, 0)) goto out_kfree_skb; +gso: spin_lock_prefetch(&dev->queue_lock); /* Disable soft irqs for various locks below. Also * stops preemption for RCU. */ - local_bh_disable(); + rcu_read_lock_bh(); /* Updates of qdisc are serialized by queue_lock. * The struct Qdisc which is pointed to by qdisc is now a @@ -1382,8 +1492,8 @@ int dev_queue_xmit(struct sk_buff *skb) /* The device has no queue. Common case for software devices: loopback, all the sorts of tunnels... - Really, it is unlikely that xmit_lock protection is necessary here. - (f.e. loopback and IP tunnels are clean ignoring statistics + Really, it is unlikely that netif_tx_lock protection is necessary + here. (f.e. loopback and IP tunnels are clean ignoring statistics counters.) However, it is possible, that they rely on protection made by us here. @@ -1399,11 +1509,8 @@ int dev_queue_xmit(struct sk_buff *skb) HARD_TX_LOCK(dev, cpu); if (!netif_queue_stopped(dev)) { - if (netdev_nit) - dev_queue_xmit_nit(skb, dev); - rc = 0; - if (!dev->hard_start_xmit(skb, dev)) { + if (!dev_hard_start_xmit(skb, dev)) { HARD_TX_UNLOCK(dev); goto out; } @@ -1422,13 +1529,13 @@ int dev_queue_xmit(struct sk_buff *skb) } rc = -ENETDOWN; - local_bh_enable(); + rcu_read_unlock_bh(); out_kfree_skb: kfree_skb(skb); return rc; out: - local_bh_enable(); + rcu_read_unlock_bh(); return rc; } @@ -1648,7 +1755,7 @@ static int ing_filter(struct sk_buff *skb) if (dev->qdisc_ingress) { __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd); if (MAX_RED_LOOP < ttl++) { - printk("Redir loop detected Dropping packet (%s->%s)\n", + printk(KERN_WARNING "Redir loop detected Dropping packet (%s->%s)\n", skb->input_dev->name, skb->dev->name); return TC_ACT_SHOT; } @@ -1846,6 +1953,19 @@ static void net_rx_action(struct softirq_action *h) } } out: +#ifdef CONFIG_NET_DMA + /* + * There may not be any more sk_buffs coming right now, so push + * any pending DMA copies to hardware + */ + if (net_dma_client) { + struct dma_chan *chan; + rcu_read_lock(); + list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node) + dma_async_memcpy_issue_pending(chan); + rcu_read_unlock(); + } +#endif local_irq_enable(); return; @@ -2785,7 +2905,7 @@ int register_netdevice(struct net_device *dev) BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); spin_lock_init(&dev->queue_lock); - spin_lock_init(&dev->xmit_lock); + spin_lock_init(&dev->_xmit_lock); dev->xmit_lock_owner = -1; #ifdef CONFIG_NET_CLS_ACT spin_lock_init(&dev->ingress_lock); @@ -2829,10 +2949,8 @@ int register_netdevice(struct net_device *dev) /* Fix illegal SG+CSUM combinations. */ if ((dev->features & NETIF_F_SG) && - !(dev->features & (NETIF_F_IP_CSUM | - NETIF_F_NO_CSUM | - NETIF_F_HW_CSUM))) { - printk("%s: Dropping NETIF_F_SG since no checksum feature.\n", + !(dev->features & NETIF_F_ALL_CSUM)) { + printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n", dev->name); dev->features &= ~NETIF_F_SG; } @@ -2840,7 +2958,7 @@ int register_netdevice(struct net_device *dev) /* TSO requires that SG is present as well. */ if ((dev->features & NETIF_F_TSO) && !(dev->features & NETIF_F_SG)) { - printk("%s: Dropping NETIF_F_TSO since no SG feature.\n", + printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n", dev->name); dev->features &= ~NETIF_F_TSO; } @@ -3022,7 +3140,7 @@ static void netdev_wait_allrefs(struct net_device *dev) static DEFINE_MUTEX(net_todo_run_mutex); void netdev_run_todo(void) { - struct list_head list = LIST_HEAD_INIT(list); + struct list_head list; /* Need to guard against multiple cpu's getting out of order. */ mutex_lock(&net_todo_run_mutex); @@ -3037,9 +3155,9 @@ void netdev_run_todo(void) /* Snapshot list, allow later requests */ spin_lock(&net_todo_list_lock); - list_splice_init(&net_todo_list, &list); + list_replace_init(&net_todo_list, &list); spin_unlock(&net_todo_list_lock); - + while (!list_empty(&list)) { struct net_device *dev = list_entry(list.next, struct net_device, todo_list); @@ -3300,6 +3418,88 @@ static int dev_cpu_callback(struct notifier_block *nfb, } #endif /* CONFIG_HOTPLUG_CPU */ +#ifdef CONFIG_NET_DMA +/** + * net_dma_rebalance - + * This is called when the number of channels allocated to the net_dma_client + * changes. The net_dma_client tries to have one DMA channel per CPU. + */ +static void net_dma_rebalance(void) +{ + unsigned int cpu, i, n; + struct dma_chan *chan; + + lock_cpu_hotplug(); + + if (net_dma_count == 0) { + for_each_online_cpu(cpu) + rcu_assign_pointer(per_cpu(softnet_data.net_dma, cpu), NULL); + unlock_cpu_hotplug(); + return; + } + + i = 0; + cpu = first_cpu(cpu_online_map); + + rcu_read_lock(); + list_for_each_entry(chan, &net_dma_client->channels, client_node) { + n = ((num_online_cpus() / net_dma_count) + + (i < (num_online_cpus() % net_dma_count) ? 1 : 0)); + + while(n) { + per_cpu(softnet_data.net_dma, cpu) = chan; + cpu = next_cpu(cpu, cpu_online_map); + n--; + } + i++; + } + rcu_read_unlock(); + + unlock_cpu_hotplug(); +} + +/** + * netdev_dma_event - event callback for the net_dma_client + * @client: should always be net_dma_client + * @chan: DMA channel for the event + * @event: event type + */ +static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan, + enum dma_event event) +{ + spin_lock(&net_dma_event_lock); + switch (event) { + case DMA_RESOURCE_ADDED: + net_dma_count++; + net_dma_rebalance(); + break; + case DMA_RESOURCE_REMOVED: + net_dma_count--; + net_dma_rebalance(); + break; + default: + break; + } + spin_unlock(&net_dma_event_lock); +} + +/** + * netdev_dma_regiser - register the networking subsystem as a DMA client + */ +static int __init netdev_dma_register(void) +{ + spin_lock_init(&net_dma_event_lock); + net_dma_client = dma_async_client_register(netdev_dma_event); + if (net_dma_client == NULL) + return -ENOMEM; + + dma_async_client_chan_request(net_dma_client, num_online_cpus()); + return 0; +} + +#else +static int __init netdev_dma_register(void) { return -ENODEV; } +#endif /* CONFIG_NET_DMA */ /* * Initialize the DEV module. At boot time this walks the device list and @@ -3353,6 +3553,8 @@ static int __init net_dev_init(void) atomic_set(&queue->backlog_dev.refcnt, 1); } + netdev_dma_register(); + dev_boot_phase = 0; open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); @@ -3371,7 +3573,6 @@ subsys_initcall(net_dev_init); EXPORT_SYMBOL(__dev_get_by_index); EXPORT_SYMBOL(__dev_get_by_name); EXPORT_SYMBOL(__dev_remove_pack); -EXPORT_SYMBOL(__skb_linearize); EXPORT_SYMBOL(dev_valid_name); EXPORT_SYMBOL(dev_add_pack); EXPORT_SYMBOL(dev_alloc_name); diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index 05d60850840e..c57d887da2ef 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -62,7 +62,7 @@ * Device mc lists are changed by bh at least if IPv6 is enabled, * so that it must be bh protected. * - * We block accesses to device mc filters with dev->xmit_lock. + * We block accesses to device mc filters with netif_tx_lock. */ /* @@ -93,9 +93,9 @@ static void __dev_mc_upload(struct net_device *dev) void dev_mc_upload(struct net_device *dev) { - spin_lock_bh(&dev->xmit_lock); + netif_tx_lock_bh(dev); __dev_mc_upload(dev); - spin_unlock_bh(&dev->xmit_lock); + netif_tx_unlock_bh(dev); } /* @@ -107,7 +107,7 @@ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl) int err = 0; struct dev_mc_list *dmi, **dmip; - spin_lock_bh(&dev->xmit_lock); + netif_tx_lock_bh(dev); for (dmip = &dev->mc_list; (dmi = *dmip) != NULL; dmip = &dmi->next) { /* @@ -139,13 +139,13 @@ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl) */ __dev_mc_upload(dev); - spin_unlock_bh(&dev->xmit_lock); + netif_tx_unlock_bh(dev); return 0; } } err = -ENOENT; done: - spin_unlock_bh(&dev->xmit_lock); + netif_tx_unlock_bh(dev); return err; } @@ -160,7 +160,7 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl) dmi1 = kmalloc(sizeof(*dmi), GFP_ATOMIC); - spin_lock_bh(&dev->xmit_lock); + netif_tx_lock_bh(dev); for (dmi = dev->mc_list; dmi != NULL; dmi = dmi->next) { if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 && dmi->dmi_addrlen == alen) { @@ -176,7 +176,7 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl) } if ((dmi = dmi1) == NULL) { - spin_unlock_bh(&dev->xmit_lock); + netif_tx_unlock_bh(dev); return -ENOMEM; } memcpy(dmi->dmi_addr, addr, alen); @@ -189,11 +189,11 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl) __dev_mc_upload(dev); - spin_unlock_bh(&dev->xmit_lock); + netif_tx_unlock_bh(dev); return 0; done: - spin_unlock_bh(&dev->xmit_lock); + netif_tx_unlock_bh(dev); kfree(dmi1); return err; } @@ -204,7 +204,7 @@ done: void dev_mc_discard(struct net_device *dev) { - spin_lock_bh(&dev->xmit_lock); + netif_tx_lock_bh(dev); while (dev->mc_list != NULL) { struct dev_mc_list *tmp = dev->mc_list; @@ -215,7 +215,7 @@ void dev_mc_discard(struct net_device *dev) } dev->mc_count = 0; - spin_unlock_bh(&dev->xmit_lock); + netif_tx_unlock_bh(dev); } #ifdef CONFIG_PROC_FS @@ -250,7 +250,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v) struct dev_mc_list *m; struct net_device *dev = v; - spin_lock_bh(&dev->xmit_lock); + netif_tx_lock_bh(dev); for (m = dev->mc_list; m; m = m->next) { int i; @@ -262,7 +262,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v) seq_putc(seq, '\n'); } - spin_unlock_bh(&dev->xmit_lock); + netif_tx_unlock_bh(dev); return 0; } diff --git a/net/core/ethtool.c b/net/core/ethtool.c index e6f76106a99b..27ce1683caf5 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -30,7 +30,7 @@ u32 ethtool_op_get_link(struct net_device *dev) u32 ethtool_op_get_tx_csum(struct net_device *dev) { - return (dev->features & (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM)) != 0; + return (dev->features & NETIF_F_ALL_CSUM) != 0; } int ethtool_op_set_tx_csum(struct net_device *dev, u32 data) @@ -551,9 +551,7 @@ static int ethtool_set_sg(struct net_device *dev, char __user *useraddr) return -EFAULT; if (edata.data && - !(dev->features & (NETIF_F_IP_CSUM | - NETIF_F_NO_CSUM | - NETIF_F_HW_CSUM))) + !(dev->features & NETIF_F_ALL_CSUM)) return -EINVAL; return __ethtool_set_sg(dev, edata.data); @@ -591,7 +589,7 @@ static int ethtool_set_tso(struct net_device *dev, char __user *useraddr) static int ethtool_get_ufo(struct net_device *dev, char __user *useraddr) { - struct ethtool_value edata = { ETHTOOL_GTSO }; + struct ethtool_value edata = { ETHTOOL_GUFO }; if (!dev->ethtool_ops->get_ufo) return -EOPNOTSUPP; @@ -600,6 +598,7 @@ static int ethtool_get_ufo(struct net_device *dev, char __user *useraddr) return -EFAULT; return 0; } + static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr) { struct ethtool_value edata; @@ -615,6 +614,29 @@ static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr) return dev->ethtool_ops->set_ufo(dev, edata.data); } +static int ethtool_get_gso(struct net_device *dev, char __user *useraddr) +{ + struct ethtool_value edata = { ETHTOOL_GGSO }; + + edata.data = dev->features & NETIF_F_GSO; + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + +static int ethtool_set_gso(struct net_device *dev, char __user *useraddr) +{ + struct ethtool_value edata; + + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + if (edata.data) + dev->features |= NETIF_F_GSO; + else + dev->features &= ~NETIF_F_GSO; + return 0; +} + static int ethtool_self_test(struct net_device *dev, char __user *useraddr) { struct ethtool_test test; @@ -906,6 +928,12 @@ int dev_ethtool(struct ifreq *ifr) case ETHTOOL_SUFO: rc = ethtool_set_ufo(dev, useraddr); break; + case ETHTOOL_GGSO: + rc = ethtool_get_gso(dev, useraddr); + break; + case ETHTOOL_SGSO: + rc = ethtool_set_gso(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 646937cc2d84..4b36114744c5 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -11,7 +11,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/if.h> @@ -91,11 +90,10 @@ static void rfc2863_policy(struct net_device *dev) /* Must be called with the rtnl semaphore held */ void linkwatch_run_queue(void) { - LIST_HEAD(head); - struct list_head *n, *next; + struct list_head head, *n, *next; spin_lock_irq(&lweventlist_lock); - list_splice_init(&lweventlist, &head); + list_replace_init(&lweventlist, &head); spin_unlock_irq(&lweventlist_lock); list_for_each_safe(n, next, &head) { diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 50a8c73caf97..7ad681f5e712 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -15,7 +15,6 @@ * Harald Welte Add neighbour cache statistics like rtstat */ -#include <linux/config.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/module.h> diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 47a6fceb6771..13472762b18b 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -10,7 +10,6 @@ */ #include <linux/capability.h> -#include <linux/config.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/if_arp.h> diff --git a/net/core/netpoll.c b/net/core/netpoll.c index e8e05cebd95a..471da451cd48 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -54,6 +54,7 @@ static atomic_t trapped; sizeof(struct iphdr) + sizeof(struct ethhdr)) static void zap_completion_queue(void); +static void arp_reply(struct sk_buff *skb); static void queue_process(void *p) { @@ -153,6 +154,22 @@ static void poll_napi(struct netpoll *np) } } +static void service_arp_queue(struct netpoll_info *npi) +{ + struct sk_buff *skb; + + if (unlikely(!npi)) + return; + + skb = skb_dequeue(&npi->arp_tx); + + while (skb != NULL) { + arp_reply(skb); + skb = skb_dequeue(&npi->arp_tx); + } + return; +} + void netpoll_poll(struct netpoll *np) { if(!np->dev || !netif_running(np->dev) || !np->dev->poll_controller) @@ -163,6 +180,8 @@ void netpoll_poll(struct netpoll *np) if (np->dev->poll) poll_napi(np); + service_arp_queue(np->dev->npinfo); + zap_completion_queue(); } @@ -273,24 +292,17 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) do { npinfo->tries--; - spin_lock(&np->dev->xmit_lock); - np->dev->xmit_lock_owner = smp_processor_id(); + netif_tx_lock(np->dev); /* * network drivers do not expect to be called if the queue is * stopped. */ - if (netif_queue_stopped(np->dev)) { - np->dev->xmit_lock_owner = -1; - spin_unlock(&np->dev->xmit_lock); - netpoll_poll(np); - udelay(50); - continue; - } + status = NETDEV_TX_BUSY; + if (!netif_queue_stopped(np->dev)) + status = np->dev->hard_start_xmit(skb, np->dev); - status = np->dev->hard_start_xmit(skb, np->dev); - np->dev->xmit_lock_owner = -1; - spin_unlock(&np->dev->xmit_lock); + netif_tx_unlock(np->dev); /* success */ if(!status) { @@ -449,7 +461,9 @@ int __netpoll_rx(struct sk_buff *skb) int proto, len, ulen; struct iphdr *iph; struct udphdr *uh; - struct netpoll *np = skb->dev->npinfo->rx_np; + struct netpoll_info *npi = skb->dev->npinfo; + struct netpoll *np = npi->rx_np; + if (!np) goto out; @@ -459,7 +473,7 @@ int __netpoll_rx(struct sk_buff *skb) /* check if netpoll clients need ARP */ if (skb->protocol == __constant_htons(ETH_P_ARP) && atomic_read(&trapped)) { - arp_reply(skb); + skb_queue_tail(&npi->arp_tx, skb); return 1; } @@ -654,6 +668,7 @@ int netpoll_setup(struct netpoll *np) npinfo->poll_owner = -1; npinfo->tries = MAX_RETRIES; spin_lock_init(&npinfo->rx_lock); + skb_queue_head_init(&npinfo->arp_tx); } else npinfo = ndev->npinfo; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index c23e9c06ee23..67ed14ddabd2 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2897,7 +2897,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) } } - spin_lock_bh(&odev->xmit_lock); + netif_tx_lock_bh(odev); if (!netif_queue_stopped(odev)) { atomic_inc(&(pkt_dev->skb->users)); @@ -2942,7 +2942,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->next_tx_ns = 0; } - spin_unlock_bh(&odev->xmit_lock); + netif_tx_unlock_bh(odev); /* If pkt_dev->count is zero, then run forever */ if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 3fcfa9c59e1f..20e5bb73f147 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -16,7 +16,6 @@ * Vitaly E. Lavrov RTA_OK arithmetics was wrong. */ -#include <linux/config.h> #include <linux/errno.h> #include <linux/module.h> #include <linux/types.h> @@ -663,7 +662,7 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp) sz_idx = type>>2; kind = type&3; - if (kind != 2 && security_netlink_recv(skb)) { + if (kind != 2 && security_netlink_recv(skb, CAP_NET_ADMIN)) { *errp = -EPERM; return -1; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index fb3770f9c094..476aa3978504 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -38,7 +38,6 @@ * The functions in this file will not compile correctly with gcc 2.4.x */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> @@ -72,6 +71,13 @@ static kmem_cache_t *skbuff_head_cache __read_mostly; static kmem_cache_t *skbuff_fclone_cache __read_mostly; /* + * lockdep: lock class key used by skb_queue_head_init(): + */ +struct lock_class_key skb_queue_lock_key; + +EXPORT_SYMBOL(skb_queue_lock_key); + +/* * Keep out-of-line to prevent kernel bloat. * __builtin_return_address is not used because it is not always * reliable. @@ -172,9 +178,9 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, shinfo = skb_shinfo(skb); atomic_set(&shinfo->dataref, 1); shinfo->nr_frags = 0; - shinfo->tso_size = 0; - shinfo->tso_segs = 0; - shinfo->ufo_size = 0; + shinfo->gso_size = 0; + shinfo->gso_segs = 0; + shinfo->gso_type = 0; shinfo->ip6_frag_id = 0; shinfo->frag_list = NULL; @@ -238,8 +244,9 @@ struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, atomic_set(&(skb_shinfo(skb)->dataref), 1); skb_shinfo(skb)->nr_frags = 0; - skb_shinfo(skb)->tso_size = 0; - skb_shinfo(skb)->tso_segs = 0; + skb_shinfo(skb)->gso_size = 0; + skb_shinfo(skb)->gso_segs = 0; + skb_shinfo(skb)->gso_type = 0; skb_shinfo(skb)->frag_list = NULL; out: return skb; @@ -250,11 +257,11 @@ nodata: } -static void skb_drop_fraglist(struct sk_buff *skb) +static void skb_drop_list(struct sk_buff **listp) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; + struct sk_buff *list = *listp; - skb_shinfo(skb)->frag_list = NULL; + *listp = NULL; do { struct sk_buff *this = list; @@ -263,6 +270,11 @@ static void skb_drop_fraglist(struct sk_buff *skb) } while (list); } +static inline void skb_drop_fraglist(struct sk_buff *skb) +{ + skb_drop_list(&skb_shinfo(skb)->frag_list); +} + static void skb_clone_fraglist(struct sk_buff *skb) { struct sk_buff *list; @@ -271,7 +283,7 @@ static void skb_clone_fraglist(struct sk_buff *skb) skb_get(list); } -void skb_release_data(struct sk_buff *skb) +static void skb_release_data(struct sk_buff *skb) { if (!skb->cloned || !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1, @@ -464,7 +476,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) n->tc_verd = CLR_TC_MUNGED(n->tc_verd); C(input_dev); #endif - + skb_copy_secmark(n, skb); #endif C(truesize); atomic_set(&n->users, 1); @@ -526,9 +538,11 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #endif new->tc_index = old->tc_index; #endif + skb_copy_secmark(new, old); atomic_set(&new->users, 1); - skb_shinfo(new)->tso_size = skb_shinfo(old)->tso_size; - skb_shinfo(new)->tso_segs = skb_shinfo(old)->tso_segs; + skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; + skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; + skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; } /** @@ -780,69 +794,116 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, * filled. Used by network drivers which may DMA or transfer data * beyond the buffer end onto the wire. * - * May return NULL in out of memory cases. + * May return error in out of memory cases. The skb is freed on error. */ -struct sk_buff *skb_pad(struct sk_buff *skb, int pad) +int skb_pad(struct sk_buff *skb, int pad) { - struct sk_buff *nskb; + int err; + int ntail; /* If the skbuff is non linear tailroom is always zero.. */ - if (skb_tailroom(skb) >= pad) { + if (!skb_cloned(skb) && skb_tailroom(skb) >= pad) { memset(skb->data+skb->len, 0, pad); - return skb; + return 0; } - - nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad, GFP_ATOMIC); + + ntail = skb->data_len + pad - (skb->end - skb->tail); + if (likely(skb_cloned(skb) || ntail > 0)) { + err = pskb_expand_head(skb, 0, ntail, GFP_ATOMIC); + if (unlikely(err)) + goto free_skb; + } + + /* FIXME: The use of this function with non-linear skb's really needs + * to be audited. + */ + err = skb_linearize(skb); + if (unlikely(err)) + goto free_skb; + + memset(skb->data + skb->len, 0, pad); + return 0; + +free_skb: kfree_skb(skb); - if (nskb) - memset(nskb->data+nskb->len, 0, pad); - return nskb; + return err; } -/* Trims skb to length len. It can change skb pointers, if "realloc" is 1. - * If realloc==0 and trimming is impossible without change of data, - * it is BUG(). +/* Trims skb to length len. It can change skb pointers. */ -int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc) +int ___pskb_trim(struct sk_buff *skb, unsigned int len) { + struct sk_buff **fragp; + struct sk_buff *frag; int offset = skb_headlen(skb); int nfrags = skb_shinfo(skb)->nr_frags; int i; + int err; + + if (skb_cloned(skb) && + unlikely((err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))) + return err; for (i = 0; i < nfrags; i++) { int end = offset + skb_shinfo(skb)->frags[i].size; - if (end > len) { - if (skb_cloned(skb)) { - BUG_ON(!realloc); - if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) - return -ENOMEM; - } - if (len <= offset) { - put_page(skb_shinfo(skb)->frags[i].page); - skb_shinfo(skb)->nr_frags--; - } else { - skb_shinfo(skb)->frags[i].size = len - offset; - } + + if (end < len) { + offset = end; + continue; } - offset = end; + + if (len > offset) + skb_shinfo(skb)->frags[i++].size = len - offset; + + skb_shinfo(skb)->nr_frags = i; + + for (; i < nfrags; i++) + put_page(skb_shinfo(skb)->frags[i].page); + + if (skb_shinfo(skb)->frag_list) + skb_drop_fraglist(skb); + break; } - if (offset < len) { + for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp); + fragp = &frag->next) { + int end = offset + frag->len; + + if (skb_shared(frag)) { + struct sk_buff *nfrag; + + nfrag = skb_clone(frag, GFP_ATOMIC); + if (unlikely(!nfrag)) + return -ENOMEM; + + nfrag->next = frag->next; + frag = nfrag; + *fragp = frag; + } + + if (end < len) { + offset = end; + continue; + } + + if (end > len && + unlikely((err = pskb_trim(frag, len - offset)))) + return err; + + if (frag->next) + skb_drop_list(&frag->next); + break; + } + + if (len > skb_headlen(skb)) { skb->data_len -= skb->len - len; skb->len = len; } else { - if (len <= skb_headlen(skb)) { - skb->len = len; - skb->data_len = 0; - skb->tail = skb->data + len; - if (skb_shinfo(skb)->frag_list && !skb_cloned(skb)) - skb_drop_fraglist(skb); - } else { - skb->data_len -= skb->len - len; - skb->len = len; - } + skb->len = len; + skb->data_len = 0; + skb->tail = skb->data + len; } return 0; @@ -1723,12 +1784,15 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, unsigned int to, struct ts_config *config, struct ts_state *state) { + unsigned int ret; + config->get_next_block = skb_ts_get_next_block; config->finish = skb_ts_finish; skb_prepare_seq_read(skb, from, to, TS_SKB_CB(state)); - return textsearch_find(config, state); + ret = textsearch_find(config, state); + return (ret <= to - from ? ret : UINT_MAX); } /** @@ -1826,6 +1890,133 @@ unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) EXPORT_SYMBOL_GPL(skb_pull_rcsum); +/** + * skb_segment - Perform protocol segmentation on skb. + * @skb: buffer to segment + * @features: features for the output path (see dev->features) + * + * This function performs segmentation on the given skb. It returns + * the segment at the given position. It returns NULL if there are + * no more segments to generate, or when an error is encountered. + */ +struct sk_buff *skb_segment(struct sk_buff *skb, int features) +{ + struct sk_buff *segs = NULL; + struct sk_buff *tail = NULL; + unsigned int mss = skb_shinfo(skb)->gso_size; + unsigned int doffset = skb->data - skb->mac.raw; + unsigned int offset = doffset; + unsigned int headroom; + unsigned int len; + int sg = features & NETIF_F_SG; + int nfrags = skb_shinfo(skb)->nr_frags; + int err = -ENOMEM; + int i = 0; + int pos; + + __skb_push(skb, doffset); + headroom = skb_headroom(skb); + pos = skb_headlen(skb); + + do { + struct sk_buff *nskb; + skb_frag_t *frag; + int hsize, nsize; + int k; + int size; + + len = skb->len - offset; + if (len > mss) + len = mss; + + hsize = skb_headlen(skb) - offset; + if (hsize < 0) + hsize = 0; + nsize = hsize + doffset; + if (nsize > len + doffset || !sg) + nsize = len + doffset; + + nskb = alloc_skb(nsize + headroom, GFP_ATOMIC); + if (unlikely(!nskb)) + goto err; + + if (segs) + tail->next = nskb; + else + segs = nskb; + tail = nskb; + + nskb->dev = skb->dev; + nskb->priority = skb->priority; + nskb->protocol = skb->protocol; + nskb->dst = dst_clone(skb->dst); + memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); + nskb->pkt_type = skb->pkt_type; + nskb->mac_len = skb->mac_len; + + skb_reserve(nskb, headroom); + nskb->mac.raw = nskb->data; + nskb->nh.raw = nskb->data + skb->mac_len; + nskb->h.raw = nskb->nh.raw + (skb->h.raw - skb->nh.raw); + memcpy(skb_put(nskb, doffset), skb->data, doffset); + + if (!sg) { + nskb->csum = skb_copy_and_csum_bits(skb, offset, + skb_put(nskb, len), + len, 0); + continue; + } + + frag = skb_shinfo(nskb)->frags; + k = 0; + + nskb->ip_summed = CHECKSUM_HW; + nskb->csum = skb->csum; + memcpy(skb_put(nskb, hsize), skb->data + offset, hsize); + + while (pos < offset + len) { + BUG_ON(i >= nfrags); + + *frag = skb_shinfo(skb)->frags[i]; + get_page(frag->page); + size = frag->size; + + if (pos < offset) { + frag->page_offset += offset - pos; + frag->size -= offset - pos; + } + + k++; + + if (pos + size <= offset + len) { + i++; + pos += size; + } else { + frag->size -= pos + size - (offset + len); + break; + } + + frag++; + } + + skb_shinfo(nskb)->nr_frags = k; + nskb->data_len = len - hsize; + nskb->len += nskb->data_len; + nskb->truesize += nskb->data_len; + } while ((offset += len) < skb->len); + + return segs; + +err: + while ((skb = segs)) { + segs = skb->next; + kfree(skb); + } + return ERR_PTR(err); +} + +EXPORT_SYMBOL_GPL(skb_segment); + void __init skb_init(void) { skbuff_head_cache = kmem_cache_create("skbuff_head_cache", diff --git a/net/core/sock.c b/net/core/sock.c index ed2afdb9ea2d..51fcfbc041a7 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -92,7 +92,6 @@ */ #include <linux/capability.h> -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> @@ -130,6 +129,53 @@ #include <net/tcp.h> #endif +/* + * Each address family might have different locking rules, so we have + * one slock key per address family: + */ +static struct lock_class_key af_family_keys[AF_MAX]; +static struct lock_class_key af_family_slock_keys[AF_MAX]; + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +/* + * Make lock validator output more readable. (we pre-construct these + * strings build-time, so that runtime initialization of socket + * locks is fast): + */ +static const char *af_family_key_strings[AF_MAX+1] = { + "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" , + "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK", + "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" , + "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" , + "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" , + "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" , + "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" , + "sk_lock-21" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" , + "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" , + "sk_lock-27" , "sk_lock-28" , "sk_lock-29" , + "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-AF_MAX" +}; +static const char *af_family_slock_key_strings[AF_MAX+1] = { + "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , + "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK", + "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" , + "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" , + "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" , + "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" , + "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" , + "slock-21" , "slock-AF_SNA" , "slock-AF_IRDA" , + "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" , + "slock-27" , "slock-28" , "slock-29" , + "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_MAX" +}; +#endif + +/* + * sk_callback_lock locking rules are per-address-family, + * so split the lock classes by using a per-AF key: + */ +static struct lock_class_key af_callback_keys[AF_MAX]; + /* Take into consideration the size of the struct sk_buff overhead in the * determination of these values, since that is non-constant across * platforms. This makes socket queueing behavior and performance @@ -238,9 +284,16 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb) skb->dev = NULL; bh_lock_sock(sk); - if (!sock_owned_by_user(sk)) + if (!sock_owned_by_user(sk)) { + /* + * trylock + unlock semantics: + */ + mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_); + rc = sk->sk_backlog_rcv(sk, skb); - else + + mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); + } else sk_add_backlog(sk, skb); bh_unlock_sock(sk); out: @@ -565,6 +618,13 @@ set_rcvbuf: ret = -ENONET; break; + case SO_PASSSEC: + if (valbool) + set_bit(SOCK_PASSSEC, &sock->flags); + else + clear_bit(SOCK_PASSSEC, &sock->flags); + break; + /* We implement the SO_SNDLOWAT etc to not be settable (1003.1g 5.3) */ default: @@ -723,6 +783,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sk->sk_state == TCP_LISTEN; break; + case SO_PASSSEC: + v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0; + break; + case SO_PEERSEC: return security_socket_getpeersec_stream(sock, optval, optlen, len); @@ -739,6 +803,33 @@ lenout: return 0; } +/* + * Initialize an sk_lock. + * + * (We also register the sk_lock with the lock validator.) + */ +static void inline sock_lock_init(struct sock *sk) +{ + spin_lock_init(&sk->sk_lock.slock); + sk->sk_lock.owner = NULL; + init_waitqueue_head(&sk->sk_lock.wq); + /* + * Make sure we are not reinitializing a held lock: + */ + debug_check_no_locks_freed((void *)&sk->sk_lock, sizeof(sk->sk_lock)); + + /* + * Mark both the sk_lock and the sk_lock.slock as a + * per-address-family lock class: + */ + lockdep_set_class_and_name(&sk->sk_lock.slock, + af_family_slock_keys + sk->sk_family, + af_family_slock_key_strings[sk->sk_family]); + lockdep_init_map(&sk->sk_lock.dep_map, + af_family_key_strings[sk->sk_family], + af_family_keys + sk->sk_family); +} + /** * sk_alloc - All socket objects are allocated here * @family: protocol family @@ -832,9 +923,14 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) atomic_set(&newsk->sk_omem_alloc, 0); skb_queue_head_init(&newsk->sk_receive_queue); skb_queue_head_init(&newsk->sk_write_queue); +#ifdef CONFIG_NET_DMA + skb_queue_head_init(&newsk->sk_async_wait_queue); +#endif rwlock_init(&newsk->sk_dst_lock); rwlock_init(&newsk->sk_callback_lock); + lockdep_set_class(&newsk->sk_callback_lock, + af_callback_keys + newsk->sk_family); newsk->sk_dst_cache = NULL; newsk->sk_wmem_queued = 0; @@ -1383,6 +1479,9 @@ void sock_init_data(struct socket *sock, struct sock *sk) skb_queue_head_init(&sk->sk_receive_queue); skb_queue_head_init(&sk->sk_write_queue); skb_queue_head_init(&sk->sk_error_queue); +#ifdef CONFIG_NET_DMA + skb_queue_head_init(&sk->sk_async_wait_queue); +#endif sk->sk_send_head = NULL; @@ -1406,6 +1505,8 @@ void sock_init_data(struct socket *sock, struct sock *sk) rwlock_init(&sk->sk_dst_lock); rwlock_init(&sk->sk_callback_lock); + lockdep_set_class(&sk->sk_callback_lock, + af_callback_keys + sk->sk_family); sk->sk_state_change = sock_def_wakeup; sk->sk_data_ready = sock_def_readable; @@ -1433,24 +1534,34 @@ void sock_init_data(struct socket *sock, struct sock *sk) void fastcall lock_sock(struct sock *sk) { might_sleep(); - spin_lock_bh(&(sk->sk_lock.slock)); + spin_lock_bh(&sk->sk_lock.slock); if (sk->sk_lock.owner) __lock_sock(sk); sk->sk_lock.owner = (void *)1; - spin_unlock_bh(&(sk->sk_lock.slock)); + spin_unlock(&sk->sk_lock.slock); + /* + * The sk_lock has mutex_lock() semantics here: + */ + mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_); + local_bh_enable(); } EXPORT_SYMBOL(lock_sock); void fastcall release_sock(struct sock *sk) { - spin_lock_bh(&(sk->sk_lock.slock)); + /* + * The sk_lock has mutex_unlock() semantics: + */ + mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); + + spin_lock_bh(&sk->sk_lock.slock); if (sk->sk_backlog.tail) __release_sock(sk); sk->sk_lock.owner = NULL; - if (waitqueue_active(&(sk->sk_lock.wq))) - wake_up(&(sk->sk_lock.wq)); - spin_unlock_bh(&(sk->sk_lock.slock)); + if (waitqueue_active(&sk->sk_lock.wq)) + wake_up(&sk->sk_lock.wq); + spin_unlock_bh(&sk->sk_lock.slock); } EXPORT_SYMBOL(release_sock); diff --git a/net/core/stream.c b/net/core/stream.c index e9489696f694..d1d7decf70b0 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -196,15 +196,13 @@ EXPORT_SYMBOL(sk_stream_error); void __sk_stream_mem_reclaim(struct sock *sk) { - if (sk->sk_forward_alloc >= SK_STREAM_MEM_QUANTUM) { - atomic_sub(sk->sk_forward_alloc / SK_STREAM_MEM_QUANTUM, - sk->sk_prot->memory_allocated); - sk->sk_forward_alloc &= SK_STREAM_MEM_QUANTUM - 1; - if (*sk->sk_prot->memory_pressure && - (atomic_read(sk->sk_prot->memory_allocated) < - sk->sk_prot->sysctl_mem[0])) - *sk->sk_prot->memory_pressure = 0; - } + atomic_sub(sk->sk_forward_alloc / SK_STREAM_MEM_QUANTUM, + sk->sk_prot->memory_allocated); + sk->sk_forward_alloc &= SK_STREAM_MEM_QUANTUM - 1; + if (*sk->sk_prot->memory_pressure && + (atomic_read(sk->sk_prot->memory_allocated) < + sk->sk_prot->sysctl_mem[0])) + *sk->sk_prot->memory_pressure = 0; } EXPORT_SYMBOL(__sk_stream_mem_reclaim); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 710453656721..02534131d88e 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -7,7 +7,6 @@ #include <linux/mm.h> #include <linux/sysctl.h> -#include <linux/config.h> #include <linux/module.h> #include <linux/socket.h> #include <net/sock.h> diff --git a/net/core/user_dma.c b/net/core/user_dma.c new file mode 100644 index 000000000000..b7c98dbcdb81 --- /dev/null +++ b/net/core/user_dma.c @@ -0,0 +1,131 @@ +/* + * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved. + * Portions based on net/core/datagram.c and copyrighted by their authors. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ + +/* + * This code allows the net stack to make use of a DMA engine for + * skb to iovec copies. + */ + +#include <linux/dmaengine.h> +#include <linux/socket.h> +#include <linux/rtnetlink.h> /* for BUG_TRAP */ +#include <net/tcp.h> + +#define NET_DMA_DEFAULT_COPYBREAK 4096 + +int sysctl_tcp_dma_copybreak = NET_DMA_DEFAULT_COPYBREAK; + +/** + * dma_skb_copy_datagram_iovec - Copy a datagram to an iovec. + * @skb - buffer to copy + * @offset - offset in the buffer to start copying from + * @iovec - io vector to copy to + * @len - amount of data to copy from buffer to iovec + * @pinned_list - locked iovec buffer data + * + * Note: the iovec is modified during the copy. + */ +int dma_skb_copy_datagram_iovec(struct dma_chan *chan, + struct sk_buff *skb, int offset, struct iovec *to, + size_t len, struct dma_pinned_list *pinned_list) +{ + int start = skb_headlen(skb); + int i, copy = start - offset; + dma_cookie_t cookie = 0; + + /* Copy header. */ + if (copy > 0) { + if (copy > len) + copy = len; + cookie = dma_memcpy_to_iovec(chan, to, pinned_list, + skb->data + offset, copy); + if (cookie < 0) + goto fault; + len -= copy; + if (len == 0) + goto end; + offset += copy; + } + + /* Copy paged appendix. Hmm... why does this look so complicated? */ + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + int end; + + BUG_TRAP(start <= offset + len); + + end = start + skb_shinfo(skb)->frags[i].size; + copy = end - offset; + if ((copy = end - offset) > 0) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + struct page *page = frag->page; + + if (copy > len) + copy = len; + + cookie = dma_memcpy_pg_to_iovec(chan, to, pinned_list, page, + frag->page_offset + offset - start, copy); + if (cookie < 0) + goto fault; + len -= copy; + if (len == 0) + goto end; + offset += copy; + } + start = end; + } + + if (skb_shinfo(skb)->frag_list) { + struct sk_buff *list = skb_shinfo(skb)->frag_list; + + for (; list; list = list->next) { + int end; + + BUG_TRAP(start <= offset + len); + + end = start + list->len; + copy = end - offset; + if (copy > 0) { + if (copy > len) + copy = len; + cookie = dma_skb_copy_datagram_iovec(chan, list, + offset - start, to, copy, + pinned_list); + if (cookie < 0) + goto fault; + len -= copy; + if (len == 0) + goto end; + offset += copy; + } + start = end; + } + } + +end: + if (!len) { + skb->dma_cookie = cookie; + return cookie; + } + +fault: + return -EFAULT; +} diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig index 7e096ba8454f..859e3359fcda 100644 --- a/net/dccp/Kconfig +++ b/net/dccp/Kconfig @@ -26,7 +26,7 @@ config INET_DCCP_DIAG config IP_DCCP_ACKVEC depends on IP_DCCP - def_bool N + bool source "net/dccp/ccids/Kconfig" diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index b5981e5f6b00..8c211c58893b 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -452,6 +452,7 @@ found: (unsigned long long) avr->dccpavr_ack_ackno); dccp_ackvec_throw_record(av, avr); + break; } /* * If it wasn't received, continue scanning... we might diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h index ec7a89bb7b39..0adf4b56c34c 100644 --- a/net/dccp/ackvec.h +++ b/net/dccp/ackvec.h @@ -11,7 +11,6 @@ * published by the Free Software Foundation. */ -#include <linux/config.h> #include <linux/compiler.h> #include <linux/list.h> #include <linux/time.h> diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index d4f9e2d33453..e9615627dcd6 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -30,7 +30,6 @@ * - jiffies wrapping */ -#include <linux/config.h> #include "../ccid.h" #include "../dccp.h" #include "ccid2.h" diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index b4a51d0355a5..c39bff706cfc 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -34,7 +34,6 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include <linux/config.h> #include "../ccid.h" #include "../dccp.h" #include "lib/packet_history.h" diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index f18b96d4e5a2..5ade4f668b22 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -36,7 +36,6 @@ #ifndef _DCCP_CCID3_H_ #define _DCCP_CCID3_H_ -#include <linux/config.h> #include <linux/list.h> #include <linux/time.h> #include <linux/types.h> diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index 4c01a54143ad..5d7b7d864385 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -11,7 +11,6 @@ * (at your option) any later version. */ -#include <linux/config.h> #include <linux/module.h> #include "loss_interval.h" diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h index 417d9d82df3e..43bf78269d1d 100644 --- a/net/dccp/ccids/lib/loss_interval.h +++ b/net/dccp/ccids/lib/loss_interval.h @@ -13,7 +13,6 @@ * any later version. */ -#include <linux/config.h> #include <linux/list.h> #include <linux/slab.h> #include <linux/time.h> diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index d3f9d2053830..ad98d6a322eb 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -34,7 +34,6 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/string.h> diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index 122e96737ff6..673c209e4e85 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -37,7 +37,6 @@ #ifndef _DCCP_PKT_HIST_ #define _DCCP_PKT_HIST_ -#include <linux/config.h> #include <linux/list.h> #include <linux/slab.h> #include <linux/time.h> diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c index add3cae65e2d..4fd2ebebf5a0 100644 --- a/net/dccp/ccids/lib/tfrc_equation.c +++ b/net/dccp/ccids/lib/tfrc_equation.c @@ -12,7 +12,6 @@ * (at your option) any later version. */ -#include <linux/config.h> #include <linux/module.h> #include <asm/div64.h> diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 1fe509148689..d00a2f4ee5dd 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -12,7 +12,6 @@ * published by the Free Software Foundation. */ -#include <linux/config.h> #include <linux/dccp.h> #include <net/snmp.h> #include <net/sock.h> diff --git a/net/dccp/diag.c b/net/dccp/diag.c index 0f25dc395967..0f3745585a94 100644 --- a/net/dccp/diag.c +++ b/net/dccp/diag.c @@ -9,7 +9,6 @@ * published by the Free Software Foundation. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/inet_diag.h> diff --git a/net/dccp/feat.c b/net/dccp/feat.c index b39e2a597889..a1b0682ee77c 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -10,7 +10,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/module.h> #include "dccp.h" diff --git a/net/dccp/input.c b/net/dccp/input.c index bfc53665516b..7f9dc6ac58c9 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -10,7 +10,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/dccp.h> #include <linux/skbuff.h> diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index f2c011fd2ba1..c3073e7e81d3 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -10,7 +10,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/dccp.h> #include <linux/icmp.h> #include <linux/module.h> diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 65e2ab0886e6..ff42bc43263d 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -12,7 +12,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/random.h> #include <linux/xfrm.h> diff --git a/net/dccp/ipv6.h b/net/dccp/ipv6.h index e4d4e9309270..6eef81fdbe56 100644 --- a/net/dccp/ipv6.h +++ b/net/dccp/ipv6.h @@ -11,7 +11,6 @@ * published by the Free Software Foundation. */ -#include <linux/config.h> #include <linux/dccp.h> #include <linux/ipv6.h> diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index c0349e5b0551..9045438d6b36 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -10,7 +10,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/dccp.h> #include <linux/skbuff.h> #include <linux/timer.h> diff --git a/net/dccp/options.c b/net/dccp/options.c index e9feb2a0c770..c3cda1e39aa8 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -11,7 +11,6 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/dccp.h> #include <linux/module.h> #include <linux/types.h> diff --git a/net/dccp/output.c b/net/dccp/output.c index 7409e4a3abdf..58669beee132 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -10,7 +10,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/dccp.h> #include <linux/kernel.h> #include <linux/skbuff.h> diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 2e0ee8355c41..6f14bb5a28d4 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -9,7 +9,6 @@ * published by the Free Software Foundation. */ -#include <linux/config.h> #include <linux/dccp.h> #include <linux/module.h> #include <linux/types.h> @@ -485,7 +484,7 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname, err = -EINVAL; else err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L, - (struct dccp_so_feat *) + (struct dccp_so_feat __user *) optval); break; @@ -494,7 +493,7 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname, err = -EINVAL; else err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R, - (struct dccp_so_feat *) + (struct dccp_so_feat __user *) optval); break; @@ -719,7 +718,7 @@ int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } dccp_pr_debug("packet_type=%s\n", dccp_packet_name(dh->dccph_type)); - sk_eat_skb(sk, skb); + sk_eat_skb(sk, skb, 0); verify_sock_status: if (sock_flag(sk, SOCK_DONE)) { len = 0; @@ -773,7 +772,7 @@ verify_sock_status: } found_fin_ok: if (!(flags & MSG_PEEK)) - sk_eat_skb(sk, skb); + sk_eat_skb(sk, skb, 0); break; } while (1); out: diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c index 64c89e9c229e..c1ba9451bc3d 100644 --- a/net/dccp/sysctl.c +++ b/net/dccp/sysctl.c @@ -9,7 +9,6 @@ * as published by the Free Software Foundation. */ -#include <linux/config.h> #include <linux/mm.h> #include <linux/sysctl.h> diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 5244415e5f18..8447742f5615 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -10,7 +10,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/dccp.h> #include <linux/skbuff.h> diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 2b289ef20ab3..5486247735f6 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -99,7 +99,6 @@ Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat dn_bind fixes *******************************************************************************/ -#include <linux/config.h> #include <linux/module.h> #include <linux/errno.h> #include <linux/types.h> diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index a26ff9f44576..98a25208440d 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -24,7 +24,6 @@ * devices. All mtu based now. */ -#include <linux/config.h> #include <linux/capability.h> #include <linux/module.h> #include <linux/moduleparam.h> diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index bd4ce8681a12..0375077391b7 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -17,7 +17,6 @@ * this code was copied from it. * */ -#include <linux/config.h> #include <linux/string.h> #include <linux/net.h> #include <linux/socket.h> diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 66e230c3b328..5ce9c9e0565c 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -24,7 +24,6 @@ * */ -#include <linux/config.h> #include <linux/net.h> #include <linux/module.h> #include <linux/socket.h> diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index 547523b41c81..86f7f3b28e70 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c @@ -45,7 +45,6 @@ GNU General Public License for more details. *******************************************************************************/ -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> @@ -801,8 +800,7 @@ got_it: * We linearize everything except data segments here. */ if (cb->nsp_flags & ~0x60) { - if (unlikely(skb_is_nonlinear(skb)) && - skb_linearize(skb, GFP_ATOMIC) != 0) + if (unlikely(skb_linearize(skb))) goto free_out; } diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index e172cf98d7fc..1355614ec11b 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -55,7 +55,6 @@ GNU General Public License for more details. *******************************************************************************/ -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> @@ -629,8 +628,7 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type padlen); if (flags & DN_RT_PKT_CNTL) { - if (unlikely(skb_is_nonlinear(skb)) && - skb_linearize(skb, GFP_ATOMIC) != 0) + if (unlikely(skb_linearize(skb))) goto dump_it; switch(flags & DN_RT_CNTL_MSK) { diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index 446faafe2065..22f321d9bf9d 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -13,7 +13,6 @@ * Changes: * */ -#include <linux/config.h> #include <linux/string.h> #include <linux/net.h> #include <linux/socket.h> @@ -400,9 +399,10 @@ int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) { if (idx < s_idx) - continue; + goto next; if (dn_fib_fill_rule(skb, r, cb, NLM_F_MULTI) < 0) break; +next: idx++; } rcu_read_unlock(); diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 0ebc46af1bdd..37d9d0a1ac8c 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -12,7 +12,6 @@ * Changes: * */ -#include <linux/config.h> #include <linux/string.h> #include <linux/net.h> #include <linux/socket.h> diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index 74133ecd7700..8b99bd33540d 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -107,7 +107,7 @@ static inline void dnrmg_receive_user_skb(struct sk_buff *skb) if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) return; - if (!cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN)) + if (security_netlink_recv(skb, CAP_NET_ADMIN)) RCV_SKB_FAIL(-EPERM); /* Eventually we might send routing messages too */ diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index bda5920215fd..e246f054f368 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c @@ -13,7 +13,6 @@ * Steve Whitehouse - Memory buffer settings, like the tcp ones * */ -#include <linux/config.h> #include <linux/mm.h> #include <linux/sysctl.h> #include <linux/fs.h> diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 868265619dbb..309ae4c6549a 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -9,7 +9,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> diff --git a/net/ethernet/Makefile b/net/ethernet/Makefile index 69b74a9a0fc3..7cef1d8ace27 100644 --- a/net/ethernet/Makefile +++ b/net/ethernet/Makefile @@ -3,6 +3,5 @@ # obj-y += eth.o -obj-$(CONFIG_SYSCTL) += sysctl_net_ether.o obj-$(subst m,y,$(CONFIG_IPX)) += pe2.o obj-$(subst m,y,$(CONFIG_ATALK)) += pe2.o diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index c971f14712ec..387c71c584ee 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -51,7 +51,6 @@ #include <linux/etherdevice.h> #include <linux/skbuff.h> #include <linux/errno.h> -#include <linux/config.h> #include <linux/init.h> #include <linux/if_ether.h> #include <net/dst.h> diff --git a/net/ethernet/sysctl_net_ether.c b/net/ethernet/sysctl_net_ether.c deleted file mode 100644 index 66b39fc342d2..000000000000 --- a/net/ethernet/sysctl_net_ether.c +++ /dev/null @@ -1,14 +0,0 @@ -/* -*- linux-c -*- - * sysctl_net_ether.c: sysctl interface to net Ethernet subsystem. - * - * Begun April 1, 1996, Mike Shaver. - * Added /proc/sys/net/ether directory entry (empty =) ). [MS] - */ - -#include <linux/mm.h> -#include <linux/sysctl.h> -#include <linux/if_ether.h> - -ctl_table ether_table[] = { - {0} -}; diff --git a/net/ieee80211/ieee80211_crypt_ccmp.c b/net/ieee80211/ieee80211_crypt_ccmp.c index 78b2d13e80e3..492647382ad0 100644 --- a/net/ieee80211/ieee80211_crypt_ccmp.c +++ b/net/ieee80211/ieee80211_crypt_ccmp.c @@ -9,7 +9,6 @@ * more details. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/init.h> #include <linux/slab.h> diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c index 93def94c1b32..34dba0ba545d 100644 --- a/net/ieee80211/ieee80211_crypt_tkip.c +++ b/net/ieee80211/ieee80211_crypt_tkip.c @@ -9,7 +9,6 @@ * more details. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/init.h> #include <linux/slab.h> @@ -501,8 +500,11 @@ static int michael_mic(struct ieee80211_tkip_data *tkey, u8 * key, u8 * hdr, static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr) { struct ieee80211_hdr_4addr *hdr11; + u16 stype; hdr11 = (struct ieee80211_hdr_4addr *)skb->data; + stype = WLAN_FC_GET_STYPE(le16_to_cpu(hdr11->frame_ctl)); + switch (le16_to_cpu(hdr11->frame_ctl) & (IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS)) { case IEEE80211_FCTL_TODS: @@ -523,7 +525,13 @@ static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr) break; } - hdr[12] = 0; /* priority */ + if (stype & IEEE80211_STYPE_QOS_DATA) { + const struct ieee80211_hdr_3addrqos *qoshdr = + (struct ieee80211_hdr_3addrqos *)skb->data; + hdr[12] = le16_to_cpu(qoshdr->qos_ctl) & IEEE80211_QCTL_TID; + } else + hdr[12] = 0; /* priority */ + hdr[13] = hdr[14] = hdr[15] = 0; /* reserved */ } diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/ieee80211/ieee80211_crypt_wep.c index 649e581fa565..c5a87724aabe 100644 --- a/net/ieee80211/ieee80211_crypt_wep.c +++ b/net/ieee80211/ieee80211_crypt_wep.c @@ -9,7 +9,6 @@ * more details. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/init.h> #include <linux/slab.h> diff --git a/net/ieee80211/ieee80211_geo.c b/net/ieee80211/ieee80211_geo.c index 192243ab35ed..305a09de85a5 100644 --- a/net/ieee80211/ieee80211_geo.c +++ b/net/ieee80211/ieee80211_geo.c @@ -24,7 +24,6 @@ ******************************************************************************/ #include <linux/compiler.h> -#include <linux/config.h> #include <linux/errno.h> #include <linux/if_arp.h> #include <linux/in6.h> diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c index 2cb84d84f671..13b1e5fff7e4 100644 --- a/net/ieee80211/ieee80211_module.c +++ b/net/ieee80211/ieee80211_module.c @@ -31,7 +31,6 @@ *******************************************************************************/ #include <linux/compiler.h> -#include <linux/config.h> #include <linux/errno.h> #include <linux/if_arp.h> #include <linux/in6.h> diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c index 604b7b0097bc..72d4d4e04d42 100644 --- a/net/ieee80211/ieee80211_rx.c +++ b/net/ieee80211/ieee80211_rx.c @@ -14,7 +14,6 @@ */ #include <linux/compiler.h> -#include <linux/config.h> #include <linux/errno.h> #include <linux/if_arp.h> #include <linux/in6.h> @@ -405,9 +404,9 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, #endif if (ieee->iw_mode == IW_MODE_MONITOR) { - ieee80211_monitor_rx(ieee, skb, rx_stats); stats->rx_packets++; stats->rx_bytes += skb->len; + ieee80211_monitor_rx(ieee, skb, rx_stats); return 1; } @@ -1692,8 +1691,8 @@ void ieee80211_rx_mgt(struct ieee80211_device *ieee, WLAN_FC_GET_STYPE(le16_to_cpu (header->frame_ctl))); - IEEE80211_WARNING("%s: IEEE80211_REASSOC_REQ received\n", - ieee->dev->name); + IEEE80211_DEBUG_MGMT("%s: IEEE80211_REASSOC_REQ received\n", + ieee->dev->name); if (ieee->handle_reassoc_request != NULL) ieee->handle_reassoc_request(ieee->dev, (struct ieee80211_reassoc_request *) @@ -1705,8 +1704,8 @@ void ieee80211_rx_mgt(struct ieee80211_device *ieee, WLAN_FC_GET_STYPE(le16_to_cpu (header->frame_ctl))); - IEEE80211_WARNING("%s: IEEE80211_ASSOC_REQ received\n", - ieee->dev->name); + IEEE80211_DEBUG_MGMT("%s: IEEE80211_ASSOC_REQ received\n", + ieee->dev->name); if (ieee->handle_assoc_request != NULL) ieee->handle_assoc_request(ieee->dev); break; @@ -1722,10 +1721,10 @@ void ieee80211_rx_mgt(struct ieee80211_device *ieee, IEEE80211_DEBUG_MGMT("received UNKNOWN (%d)\n", WLAN_FC_GET_STYPE(le16_to_cpu (header->frame_ctl))); - IEEE80211_WARNING("%s: Unknown management packet: %d\n", - ieee->dev->name, - WLAN_FC_GET_STYPE(le16_to_cpu - (header->frame_ctl))); + IEEE80211_DEBUG_MGMT("%s: Unknown management packet: %d\n", + ieee->dev->name, + WLAN_FC_GET_STYPE(le16_to_cpu + (header->frame_ctl))); break; } } diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c index 8b4332f53394..bf042139c7ab 100644 --- a/net/ieee80211/ieee80211_tx.c +++ b/net/ieee80211/ieee80211_tx.c @@ -24,7 +24,6 @@ ******************************************************************************/ #include <linux/compiler.h> -#include <linux/config.h> #include <linux/errno.h> #include <linux/if_arp.h> #include <linux/in6.h> @@ -220,13 +219,43 @@ static struct ieee80211_txb *ieee80211_alloc_txb(int nr_frags, int txb_size, return txb; } +static int ieee80211_classify(struct sk_buff *skb) +{ + struct ethhdr *eth; + struct iphdr *ip; + + eth = (struct ethhdr *)skb->data; + if (eth->h_proto != __constant_htons(ETH_P_IP)) + return 0; + + ip = skb->nh.iph; + switch (ip->tos & 0xfc) { + case 0x20: + return 2; + case 0x40: + return 1; + case 0x60: + return 3; + case 0x80: + return 4; + case 0xa0: + return 5; + case 0xc0: + return 6; + case 0xe0: + return 7; + default: + return 0; + } +} + /* Incoming skb is converted to a txb which consists of * a block of 802.11 fragment packets (stored as skbs) */ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev) { struct ieee80211_device *ieee = netdev_priv(dev); struct ieee80211_txb *txb = NULL; - struct ieee80211_hdr_3addr *frag_hdr; + struct ieee80211_hdr_3addrqos *frag_hdr; int i, bytes_per_frag, nr_frags, bytes_last_frag, frag_size, rts_required; unsigned long flags; @@ -234,9 +263,10 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev) int ether_type, encrypt, host_encrypt, host_encrypt_msdu, host_build_iv; int bytes, fc, hdr_len; struct sk_buff *skb_frag; - struct ieee80211_hdr_3addr header = { /* Ensure zero initialized */ + struct ieee80211_hdr_3addrqos header = {/* Ensure zero initialized */ .duration_id = 0, - .seq_ctl = 0 + .seq_ctl = 0, + .qos_ctl = 0 }; u8 dest[ETH_ALEN], src[ETH_ALEN]; struct ieee80211_crypt_data *crypt; @@ -282,12 +312,6 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev) memcpy(dest, skb->data, ETH_ALEN); memcpy(src, skb->data + ETH_ALEN, ETH_ALEN); - /* Advance the SKB to the start of the payload */ - skb_pull(skb, sizeof(struct ethhdr)); - - /* Determine total amount of storage required for TXB packets */ - bytes = skb->len + SNAP_SIZE + sizeof(u16); - if (host_encrypt || host_build_iv) fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA | IEEE80211_FCTL_PROTECTED; @@ -306,9 +330,23 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev) memcpy(header.addr2, src, ETH_ALEN); memcpy(header.addr3, ieee->bssid, ETH_ALEN); } - header.frame_ctl = cpu_to_le16(fc); hdr_len = IEEE80211_3ADDR_LEN; + if (ieee->is_qos_active && ieee->is_qos_active(dev, skb)) { + fc |= IEEE80211_STYPE_QOS_DATA; + hdr_len += 2; + + skb->priority = ieee80211_classify(skb); + header.qos_ctl |= skb->priority & IEEE80211_QCTL_TID; + } + header.frame_ctl = cpu_to_le16(fc); + + /* Advance the SKB to the start of the payload */ + skb_pull(skb, sizeof(struct ethhdr)); + + /* Determine total amount of storage required for TXB packets */ + bytes = skb->len + SNAP_SIZE + sizeof(u16); + /* Encrypt msdu first on the whole data packet. */ if ((host_encrypt || host_encrypt_msdu) && crypt && crypt->ops && crypt->ops->encrypt_msdu) { @@ -402,7 +440,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev) if (rts_required) { skb_frag = txb->fragments[0]; frag_hdr = - (struct ieee80211_hdr_3addr *)skb_put(skb_frag, hdr_len); + (struct ieee80211_hdr_3addrqos *)skb_put(skb_frag, hdr_len); /* * Set header frame_ctl to the RTS. @@ -433,7 +471,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev) crypt->ops->extra_mpdu_prefix_len); frag_hdr = - (struct ieee80211_hdr_3addr *)skb_put(skb_frag, hdr_len); + (struct ieee80211_hdr_3addrqos *)skb_put(skb_frag, hdr_len); memcpy(frag_hdr, &header, hdr_len); /* If this is not the last fragment, then add the MOREFRAGS @@ -516,16 +554,23 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev) /* Incoming 802.11 strucure is converted to a TXB * a block of 802.11 fragment packets (stored as skbs) */ int ieee80211_tx_frame(struct ieee80211_device *ieee, - struct ieee80211_hdr *frame, int len) + struct ieee80211_hdr *frame, int hdr_len, int total_len, + int encrypt_mpdu) { struct ieee80211_txb *txb = NULL; unsigned long flags; struct net_device_stats *stats = &ieee->stats; struct sk_buff *skb_frag; int priority = -1; + int fraglen = total_len; + int headroom = ieee->tx_headroom; + struct ieee80211_crypt_data *crypt = ieee->crypt[ieee->tx_keyidx]; spin_lock_irqsave(&ieee->lock, flags); + if (encrypt_mpdu && (!ieee->sec.encrypt || !crypt)) + encrypt_mpdu = 0; + /* If there is no driver handler to take the TXB, dont' bother * creating it... */ if (!ieee->hard_start_xmit) { @@ -533,32 +578,45 @@ int ieee80211_tx_frame(struct ieee80211_device *ieee, goto success; } - if (unlikely(len < 24)) { + if (unlikely(total_len < 24)) { printk(KERN_WARNING "%s: skb too small (%d).\n", - ieee->dev->name, len); + ieee->dev->name, total_len); goto success; } + if (encrypt_mpdu) { + frame->frame_ctl |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); + fraglen += crypt->ops->extra_mpdu_prefix_len + + crypt->ops->extra_mpdu_postfix_len; + headroom += crypt->ops->extra_mpdu_prefix_len; + } + /* When we allocate the TXB we allocate enough space for the reserve * and full fragment bytes (bytes_per_frag doesn't include prefix, * postfix, header, FCS, etc.) */ - txb = ieee80211_alloc_txb(1, len, ieee->tx_headroom, GFP_ATOMIC); + txb = ieee80211_alloc_txb(1, fraglen, headroom, GFP_ATOMIC); if (unlikely(!txb)) { printk(KERN_WARNING "%s: Could not allocate TXB\n", ieee->dev->name); goto failed; } txb->encrypted = 0; - txb->payload_size = len; + txb->payload_size = fraglen; skb_frag = txb->fragments[0]; - memcpy(skb_put(skb_frag, len), frame, len); + memcpy(skb_put(skb_frag, total_len), frame, total_len); if (ieee->config & (CFG_IEEE80211_COMPUTE_FCS | CFG_IEEE80211_RESERVE_FCS)) skb_put(skb_frag, 4); + /* To avoid overcomplicating things, we do the corner-case frame + * encryption in software. The only real situation where encryption is + * needed here is during software-based shared key authentication. */ + if (encrypt_mpdu) + ieee80211_encrypt_fragment(ieee, skb_frag, hdr_len); + success: spin_unlock_irqrestore(&ieee->lock, flags); diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c index b885fd189403..a78c4f845f66 100644 --- a/net/ieee80211/ieee80211_wx.c +++ b/net/ieee80211/ieee80211_wx.c @@ -50,7 +50,8 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee, char *p; struct iw_event iwe; int i, j; - u8 max_rate, rate; + char *current_val; /* For rates */ + u8 rate; /* First entry *MUST* be the AP MAC address */ iwe.cmd = SIOCGIWAP; @@ -107,9 +108,13 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee, start = iwe_stream_add_point(start, stop, &iwe, network->ssid); /* Add basic and extended rates */ - max_rate = 0; - p = custom; - p += snprintf(p, MAX_CUSTOM_LEN - (p - custom), " Rates (Mb/s): "); + /* Rate : stuffing multiple values in a single event require a bit + * more of magic - Jean II */ + current_val = start + IW_EV_LCP_LEN; + iwe.cmd = SIOCGIWRATE; + /* Those two flags are ignored... */ + iwe.u.bitrate.fixed = iwe.u.bitrate.disabled = 0; + for (i = 0, j = 0; i < network->rates_len;) { if (j < network->rates_ex_len && ((network->rates_ex[j] & 0x7F) < @@ -117,28 +122,21 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee, rate = network->rates_ex[j++] & 0x7F; else rate = network->rates[i++] & 0x7F; - if (rate > max_rate) - max_rate = rate; - p += snprintf(p, MAX_CUSTOM_LEN - (p - custom), - "%d%s ", rate >> 1, (rate & 1) ? ".5" : ""); + /* Bit rate given in 500 kb/s units (+ 0x80) */ + iwe.u.bitrate.value = ((rate & 0x7f) * 500000); + /* Add new value to event */ + current_val = iwe_stream_add_value(start, current_val, stop, &iwe, IW_EV_PARAM_LEN); } for (; j < network->rates_ex_len; j++) { rate = network->rates_ex[j] & 0x7F; - p += snprintf(p, MAX_CUSTOM_LEN - (p - custom), - "%d%s ", rate >> 1, (rate & 1) ? ".5" : ""); - if (rate > max_rate) - max_rate = rate; + /* Bit rate given in 500 kb/s units (+ 0x80) */ + iwe.u.bitrate.value = ((rate & 0x7f) * 500000); + /* Add new value to event */ + current_val = iwe_stream_add_value(start, current_val, stop, &iwe, IW_EV_PARAM_LEN); } - - iwe.cmd = SIOCGIWRATE; - iwe.u.bitrate.fixed = iwe.u.bitrate.disabled = 0; - iwe.u.bitrate.value = max_rate * 500000; - start = iwe_stream_add_event(start, stop, &iwe, IW_EV_PARAM_LEN); - - iwe.cmd = IWEVCUSTOM; - iwe.u.data.length = p - custom; - if (iwe.u.data.length) - start = iwe_stream_add_point(start, stop, &iwe, custom); + /* Check if we added any rate */ + if((current_val - start) > IW_EV_LCP_LEN) + start = current_val; /* Add quality statistics */ iwe.cmd = IWEVQUAL; @@ -505,7 +503,7 @@ int ieee80211_wx_get_encode(struct ieee80211_device *ieee, len = sec->key_sizes[key]; memcpy(keybuf, sec->keys[key], len); - erq->length = (len >= 0 ? len : 0); + erq->length = len; erq->flags |= IW_ENCODE_ENABLED; if (ieee->open_wep) diff --git a/net/ieee80211/softmac/Kconfig b/net/ieee80211/softmac/Kconfig index f2a27cc6ecb1..2811651cb134 100644 --- a/net/ieee80211/softmac/Kconfig +++ b/net/ieee80211/softmac/Kconfig @@ -2,6 +2,7 @@ config IEEE80211_SOFTMAC tristate "Software MAC add-on to the IEEE 802.11 networking stack" depends on IEEE80211 && EXPERIMENTAL select WIRELESS_EXT + select IEEE80211_CRYPT_WEP ---help--- This option enables the hardware independent software MAC addon for the IEEE 802.11 networking stack. diff --git a/net/ieee80211/softmac/ieee80211softmac_assoc.c b/net/ieee80211/softmac/ieee80211softmac_assoc.c index 57ea9f6f465c..44215ce64d4e 100644 --- a/net/ieee80211/softmac/ieee80211softmac_assoc.c +++ b/net/ieee80211/softmac/ieee80211softmac_assoc.c @@ -47,9 +47,7 @@ ieee80211softmac_assoc(struct ieee80211softmac_device *mac, struct ieee80211soft dprintk(KERN_INFO PFX "sent association request!\n"); - /* Change the state to associating */ spin_lock_irqsave(&mac->lock, flags); - mac->associnfo.associating = 1; mac->associated = 0; /* just to make sure */ /* Set a timer for timeout */ @@ -63,6 +61,7 @@ void ieee80211softmac_assoc_timeout(void *d) { struct ieee80211softmac_device *mac = (struct ieee80211softmac_device *)d; + struct ieee80211softmac_network *n; unsigned long flags; spin_lock_irqsave(&mac->lock, flags); @@ -75,58 +74,60 @@ ieee80211softmac_assoc_timeout(void *d) mac->associnfo.associating = 0; mac->associnfo.bssvalid = 0; mac->associated = 0; + + n = ieee80211softmac_get_network_by_bssid_locked(mac, mac->associnfo.bssid); spin_unlock_irqrestore(&mac->lock, flags); dprintk(KERN_INFO PFX "assoc request timed out!\n"); - /* FIXME: we need to know the network here. that requires a bit of restructuring */ - ieee80211softmac_call_events(mac, IEEE80211SOFTMAC_EVENT_ASSOCIATE_TIMEOUT, NULL); + ieee80211softmac_call_events(mac, IEEE80211SOFTMAC_EVENT_ASSOCIATE_TIMEOUT, n); } -/* Sends out a disassociation request to the desired AP */ -static void -ieee80211softmac_disassoc(struct ieee80211softmac_device *mac, u16 reason) +void +ieee80211softmac_disassoc(struct ieee80211softmac_device *mac) { unsigned long flags; + + spin_lock_irqsave(&mac->lock, flags); + if (mac->associnfo.associating) + cancel_delayed_work(&mac->associnfo.timeout); + + netif_carrier_off(mac->dev); + + mac->associated = 0; + mac->associnfo.bssvalid = 0; + mac->associnfo.associating = 0; + ieee80211softmac_init_txrates(mac); + ieee80211softmac_call_events_locked(mac, IEEE80211SOFTMAC_EVENT_DISASSOCIATED, NULL); + spin_unlock_irqrestore(&mac->lock, flags); +} + +/* Sends out a disassociation request to the desired AP */ +void +ieee80211softmac_send_disassoc_req(struct ieee80211softmac_device *mac, u16 reason) +{ struct ieee80211softmac_network *found; if (mac->associnfo.bssvalid && mac->associated) { found = ieee80211softmac_get_network_by_bssid(mac, mac->associnfo.bssid); if (found) ieee80211softmac_send_mgt_frame(mac, found, IEEE80211_STYPE_DISASSOC, reason); - } else if (mac->associnfo.associating) { - cancel_delayed_work(&mac->associnfo.timeout); } - /* Change our state */ - spin_lock_irqsave(&mac->lock, flags); - /* Do NOT clear bssvalid as that will break ieee80211softmac_assoc_work! */ - mac->associated = 0; - mac->associnfo.associating = 0; - ieee80211softmac_call_events_locked(mac, IEEE80211SOFTMAC_EVENT_DISASSOCIATED, NULL); - spin_unlock_irqrestore(&mac->lock, flags); + ieee80211softmac_disassoc(mac); } static inline int we_support_all_basic_rates(struct ieee80211softmac_device *mac, u8 *from, u8 from_len) { - int idx, search, found; - u8 rate, search_rate; + int idx; + u8 rate; for (idx = 0; idx < (from_len); idx++) { rate = (from)[idx]; if (!(rate & IEEE80211_BASIC_RATE_MASK)) continue; - found = 0; rate &= ~IEEE80211_BASIC_RATE_MASK; - for (search = 0; search < mac->ratesinfo.count; search++) { - search_rate = mac->ratesinfo.rates[search]; - search_rate &= ~IEEE80211_BASIC_RATE_MASK; - if (rate == search_rate) { - found = 1; - break; - } - } - if (!found) + if (!ieee80211softmac_ratesinfo_rate_supported(&mac->ratesinfo, rate)) return 0; } return 1; @@ -163,12 +164,28 @@ network_matches_request(struct ieee80211softmac_device *mac, struct ieee80211_ne } static void -ieee80211softmac_assoc_notify(struct net_device *dev, void *context) +ieee80211softmac_assoc_notify_scan(struct net_device *dev, int event_type, void *context) { struct ieee80211softmac_device *mac = ieee80211_priv(dev); ieee80211softmac_assoc_work((void*)mac); } +static void +ieee80211softmac_assoc_notify_auth(struct net_device *dev, int event_type, void *context) +{ + struct ieee80211softmac_device *mac = ieee80211_priv(dev); + + switch (event_type) { + case IEEE80211SOFTMAC_EVENT_AUTHENTICATED: + ieee80211softmac_assoc_work((void*)mac); + break; + case IEEE80211SOFTMAC_EVENT_AUTH_FAILED: + case IEEE80211SOFTMAC_EVENT_AUTH_TIMEOUT: + ieee80211softmac_disassoc(mac); + break; + } +} + /* This function is called to handle userspace requests (asynchronously) */ void ieee80211softmac_assoc_work(void *d) @@ -176,14 +193,22 @@ ieee80211softmac_assoc_work(void *d) struct ieee80211softmac_device *mac = (struct ieee80211softmac_device *)d; struct ieee80211softmac_network *found = NULL; struct ieee80211_network *net = NULL, *best = NULL; + int bssvalid; unsigned long flags; - + + /* ieee80211_disassoc might clear this */ + bssvalid = mac->associnfo.bssvalid; + /* meh */ if (mac->associated) - ieee80211softmac_disassoc(mac, WLAN_REASON_DISASSOC_STA_HAS_LEFT); + ieee80211softmac_send_disassoc_req(mac, WLAN_REASON_DISASSOC_STA_HAS_LEFT); + + spin_lock_irqsave(&mac->lock, flags); + mac->associnfo.associating = 1; + spin_unlock_irqrestore(&mac->lock, flags); /* try to find the requested network in our list, if we found one already */ - if (mac->associnfo.bssvalid || mac->associnfo.bssfixed) + if (bssvalid || mac->associnfo.bssfixed) found = ieee80211softmac_get_network_by_bssid(mac, mac->associnfo.bssid); /* Search the ieee80211 networks for this network if we didn't find it by bssid, @@ -244,7 +269,7 @@ ieee80211softmac_assoc_work(void *d) * Maybe we can hope to have more memory after scanning finishes ;) */ dprintk(KERN_INFO PFX "Associate: Scanning for networks first.\n"); - ieee80211softmac_notify(mac->dev, IEEE80211SOFTMAC_EVENT_SCAN_FINISHED, ieee80211softmac_assoc_notify, NULL); + ieee80211softmac_notify(mac->dev, IEEE80211SOFTMAC_EVENT_SCAN_FINISHED, ieee80211softmac_assoc_notify_scan, NULL); if (ieee80211softmac_start_scan(mac)) dprintk(KERN_INFO PFX "Associate: failed to initiate scan. Is device up?\n"); return; @@ -274,19 +299,32 @@ ieee80211softmac_assoc_work(void *d) memcpy(mac->associnfo.associate_essid.data, found->essid.data, IW_ESSID_MAX_SIZE + 1); /* we found a network! authenticate (if necessary) and associate to it. */ - if (!found->authenticated) { + if (found->authenticating) { + dprintk(KERN_INFO PFX "Already requested authentication, waiting...\n"); + if(!mac->associnfo.assoc_wait) { + mac->associnfo.assoc_wait = 1; + ieee80211softmac_notify_internal(mac, IEEE80211SOFTMAC_EVENT_ANY, found, ieee80211softmac_assoc_notify_auth, NULL, GFP_KERNEL); + } + return; + } + if (!found->authenticated && !found->authenticating) { /* This relies on the fact that _auth_req only queues the work, * otherwise adding the notification would be racy. */ if (!ieee80211softmac_auth_req(mac, found)) { - dprintk(KERN_INFO PFX "cannot associate without being authenticated, requested authentication\n"); - ieee80211softmac_notify_internal(mac, IEEE80211SOFTMAC_EVENT_ANY, found, ieee80211softmac_assoc_notify, NULL, GFP_KERNEL); + if(!mac->associnfo.assoc_wait) { + dprintk(KERN_INFO PFX "Cannot associate without being authenticated, requested authentication\n"); + mac->associnfo.assoc_wait = 1; + ieee80211softmac_notify_internal(mac, IEEE80211SOFTMAC_EVENT_ANY, found, ieee80211softmac_assoc_notify_auth, NULL, GFP_KERNEL); + } } else { printkl(KERN_WARNING PFX "Not authenticated, but requesting authentication failed. Giving up to associate\n"); + mac->associnfo.assoc_wait = 0; ieee80211softmac_call_events(mac, IEEE80211SOFTMAC_EVENT_ASSOCIATE_FAILED, found); } return; } /* finally! now we can start associating */ + mac->associnfo.assoc_wait = 0; ieee80211softmac_assoc(mac, found); } @@ -297,6 +335,9 @@ ieee80211softmac_associated(struct ieee80211softmac_device *mac, struct ieee80211softmac_network *net) { mac->associnfo.associating = 0; + mac->associnfo.supported_rates = net->supported_rates; + ieee80211softmac_recalc_txrates(mac); + mac->associated = 1; if (mac->set_bssid_filter) mac->set_bssid_filter(mac->dev, net->bssid); @@ -380,7 +421,6 @@ ieee80211softmac_handle_disassoc(struct net_device * dev, struct ieee80211_disassoc *disassoc) { struct ieee80211softmac_device *mac = ieee80211_priv(dev); - unsigned long flags; if (unlikely(!mac->running)) return -ENODEV; @@ -392,14 +432,11 @@ ieee80211softmac_handle_disassoc(struct net_device * dev, return 0; dprintk(KERN_INFO PFX "got disassoc frame\n"); - netif_carrier_off(dev); - spin_lock_irqsave(&mac->lock, flags); - mac->associnfo.bssvalid = 0; - mac->associated = 0; - ieee80211softmac_call_events_locked(mac, IEEE80211SOFTMAC_EVENT_DISASSOCIATED, NULL); + ieee80211softmac_disassoc(mac); + + /* try to reassociate */ schedule_work(&mac->associnfo.work); - spin_unlock_irqrestore(&mac->lock, flags); - + return 0; } diff --git a/net/ieee80211/softmac/ieee80211softmac_auth.c b/net/ieee80211/softmac/ieee80211softmac_auth.c index 06e332624665..ebc33ca6e692 100644 --- a/net/ieee80211/softmac/ieee80211softmac_auth.c +++ b/net/ieee80211/softmac/ieee80211softmac_auth.c @@ -36,8 +36,9 @@ ieee80211softmac_auth_req(struct ieee80211softmac_device *mac, struct ieee80211softmac_auth_queue_item *auth; unsigned long flags; - if (net->authenticating) + if (net->authenticating || net->authenticated) return 0; + net->authenticating = 1; /* Add the network if it's not already added */ ieee80211softmac_add_network(mac, net); @@ -92,7 +93,6 @@ ieee80211softmac_auth_queue(void *data) return; } net->authenticated = 0; - net->authenticating = 1; /* add a timeout call so we eventually give up waiting for an auth reply */ schedule_delayed_work(&auth->work, IEEE80211SOFTMAC_AUTH_TIMEOUT); auth->retry--; @@ -107,6 +107,7 @@ ieee80211softmac_auth_queue(void *data) printkl(KERN_WARNING PFX "Authentication timed out with "MAC_FMT"\n", MAC_ARG(net->bssid)); /* Remove this item from the queue */ spin_lock_irqsave(&mac->lock, flags); + net->authenticating = 0; ieee80211softmac_call_events_locked(mac, IEEE80211SOFTMAC_EVENT_AUTH_TIMEOUT, net); cancel_delayed_work(&auth->work); /* just to make sure... */ list_del(&auth->list); @@ -212,13 +213,13 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth) aq->state = IEEE80211SOFTMAC_AUTH_SHARED_RESPONSE; spin_unlock_irqrestore(&mac->lock, flags); - /* Switch to correct channel for this network */ - mac->set_channel(mac->dev, net->channel); - - /* Send our response (How to encrypt?) */ + /* Send our response */ ieee80211softmac_send_mgt_frame(mac, aq->net, IEEE80211_STYPE_AUTH, aq->state); - break; + return 0; case IEEE80211SOFTMAC_AUTH_SHARED_PASS: + kfree(net->challenge); + net->challenge = NULL; + net->challenge_len = 0; /* Check the status code of the response */ switch(auth->status) { case WLAN_STATUS_SUCCESS: @@ -229,6 +230,7 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth) spin_unlock_irqrestore(&mac->lock, flags); printkl(KERN_NOTICE PFX "Shared Key Authentication completed with "MAC_FMT"\n", MAC_ARG(net->bssid)); + ieee80211softmac_call_events(mac, IEEE80211SOFTMAC_EVENT_AUTHENTICATED, net); break; default: printkl(KERN_NOTICE PFX "Shared Key Authentication with "MAC_FMT" failed, error code: %i\n", @@ -279,6 +281,9 @@ ieee80211softmac_deauth_from_net(struct ieee80211softmac_device *mac, struct list_head *list_ptr; unsigned long flags; + /* deauthentication implies disassociation */ + ieee80211softmac_disassoc(mac); + /* Lock and reset status flags */ spin_lock_irqsave(&mac->lock, flags); net->authenticating = 0; diff --git a/net/ieee80211/softmac/ieee80211softmac_event.c b/net/ieee80211/softmac/ieee80211softmac_event.c index 8cc8f3f0f8e7..f34fa2ef666b 100644 --- a/net/ieee80211/softmac/ieee80211softmac_event.c +++ b/net/ieee80211/softmac/ieee80211softmac_event.c @@ -38,7 +38,8 @@ * The event context is private and can only be used from * within this module. Its meaning varies with the event * type: - * SCAN_FINISHED: no special meaning + * SCAN_FINISHED, + * DISASSOCIATED: NULL * ASSOCIATED, * ASSOCIATE_FAILED, * ASSOCIATE_TIMEOUT, @@ -59,15 +60,15 @@ */ static char *event_descriptions[IEEE80211SOFTMAC_EVENT_LAST+1] = { - "scan finished", - "associated", + NULL, /* scan finished */ + NULL, /* associated */ "associating failed", "associating timed out", "authenticated", "authenticating failed", "authenticating timed out", "associating failed because no suitable network was found", - "disassociated", + NULL, /* disassociated */ }; @@ -77,7 +78,7 @@ ieee80211softmac_notify_callback(void *d) struct ieee80211softmac_event event = *(struct ieee80211softmac_event*) d; kfree(d); - event.fun(event.mac->dev, event.context); + event.fun(event.mac->dev, event.event_type, event.context); } int @@ -136,30 +137,24 @@ ieee80211softmac_call_events_locked(struct ieee80211softmac_device *mac, int eve int we_event; char *msg = NULL; + memset(&wrqu, '\0', sizeof (union iwreq_data)); + switch(event) { case IEEE80211SOFTMAC_EVENT_ASSOCIATED: network = (struct ieee80211softmac_network *)event_ctx; - wrqu.data.length = 0; - wrqu.data.flags = 0; memcpy(wrqu.ap_addr.sa_data, &network->bssid[0], ETH_ALEN); - wrqu.ap_addr.sa_family = ARPHRD_ETHER; - we_event = SIOCGIWAP; - break; + /* fall through */ case IEEE80211SOFTMAC_EVENT_DISASSOCIATED: - wrqu.data.length = 0; - wrqu.data.flags = 0; - memset(&wrqu, '\0', sizeof (union iwreq_data)); wrqu.ap_addr.sa_family = ARPHRD_ETHER; we_event = SIOCGIWAP; break; case IEEE80211SOFTMAC_EVENT_SCAN_FINISHED: - wrqu.data.length = 0; - wrqu.data.flags = 0; - memset(&wrqu, '\0', sizeof (union iwreq_data)); we_event = SIOCGIWSCAN; break; default: msg = event_descriptions[event]; + if (!msg) + msg = "SOFTMAC EVENT BUG"; wrqu.data.length = strlen(msg); we_event = IWEVCUSTOM; break; @@ -172,6 +167,9 @@ ieee80211softmac_call_events_locked(struct ieee80211softmac_device *mac, int eve if ((eventptr->event_type == event || eventptr->event_type == -1) && (eventptr->event_context == NULL || eventptr->event_context == event_ctx)) { list_del(&eventptr->list); + /* User may have subscribed to ANY event, so + * we tell them which event triggered it. */ + eventptr->event_type = event; schedule_work(&eventptr->work); } } diff --git a/net/ieee80211/softmac/ieee80211softmac_io.c b/net/ieee80211/softmac/ieee80211softmac_io.c index cc6cd56c85b1..8cc8b20f5cda 100644 --- a/net/ieee80211/softmac/ieee80211softmac_io.c +++ b/net/ieee80211/softmac/ieee80211softmac_io.c @@ -149,6 +149,56 @@ ieee80211softmac_hdr_3addr(struct ieee80211softmac_device *mac, * shouldn't the sequence number be in ieee80211? */ } +static u16 +ieee80211softmac_capabilities(struct ieee80211softmac_device *mac, + struct ieee80211softmac_network *net) +{ + u16 capability = 0; + + /* ESS and IBSS bits are set according to the current mode */ + switch (mac->ieee->iw_mode) { + case IW_MODE_INFRA: + capability = cpu_to_le16(WLAN_CAPABILITY_ESS); + break; + case IW_MODE_ADHOC: + capability = cpu_to_le16(WLAN_CAPABILITY_IBSS); + break; + case IW_MODE_AUTO: + capability = net->capabilities & + (WLAN_CAPABILITY_ESS|WLAN_CAPABILITY_IBSS); + break; + default: + /* bleh. we don't ever go to these modes */ + printk(KERN_ERR PFX "invalid iw_mode!\n"); + break; + } + + /* CF Pollable / CF Poll Request */ + /* Needs to be implemented, for now, the 0's == not supported */ + + /* Privacy Bit */ + capability |= mac->ieee->sec.level ? + cpu_to_le16(WLAN_CAPABILITY_PRIVACY) : 0; + + /* Short Preamble */ + /* Always supported: we probably won't ever be powering devices which + * dont support this... */ + capability |= WLAN_CAPABILITY_SHORT_PREAMBLE; + + /* PBCC */ + /* Not widely used */ + + /* Channel Agility */ + /* Not widely used */ + + /* Short Slot */ + /* Will be implemented later */ + + /* DSSS-OFDM */ + /* Not widely used */ + + return capability; +} /***************************************************************************** * Create Management packets @@ -179,27 +229,9 @@ ieee80211softmac_assoc_req(struct ieee80211_assoc_request **pkt, return 0; ieee80211softmac_hdr_3addr(mac, &((*pkt)->header), IEEE80211_STYPE_ASSOC_REQ, net->bssid, net->bssid); - /* Fill in capability Info */ - switch (mac->ieee->iw_mode) { - case IW_MODE_INFRA: - (*pkt)->capability = cpu_to_le16(WLAN_CAPABILITY_ESS); - break; - case IW_MODE_ADHOC: - (*pkt)->capability = cpu_to_le16(WLAN_CAPABILITY_IBSS); - break; - case IW_MODE_AUTO: - (*pkt)->capability = net->capabilities & (WLAN_CAPABILITY_ESS|WLAN_CAPABILITY_IBSS); - break; - default: - /* bleh. we don't ever go to these modes */ - printk(KERN_ERR PFX "invalid iw_mode!\n"); - break; - } - /* Need to add this - (*pkt)->capability |= mac->ieee->short_slot ? - cpu_to_le16(WLAN_CAPABILITY_SHORT_SLOT_TIME) : 0; - */ - (*pkt)->capability |= mac->ieee->sec.level ? cpu_to_le16(WLAN_CAPABILITY_PRIVACY) : 0; + /* Fill in the capabilities */ + (*pkt)->capability = ieee80211softmac_capabilities(mac, net); + /* Fill in Listen Interval (?) */ (*pkt)->listen_interval = cpu_to_le16(10); @@ -239,17 +271,9 @@ ieee80211softmac_reassoc_req(struct ieee80211_reassoc_request **pkt, return 0; ieee80211softmac_hdr_3addr(mac, &((*pkt)->header), IEEE80211_STYPE_REASSOC_REQ, net->bssid, net->bssid); - /* Fill in capability Info */ - (*pkt)->capability = mac->ieee->iw_mode == IW_MODE_MASTER ? - cpu_to_le16(WLAN_CAPABILITY_ESS) : - cpu_to_le16(WLAN_CAPABILITY_IBSS); - /* - (*pkt)->capability |= mac->ieee->short_slot ? - cpu_to_le16(WLAN_CAPABILITY_SHORT_SLOT_TIME) : 0; - */ - (*pkt)->capability |= mac->ieee->sec.level ? - cpu_to_le16(WLAN_CAPABILITY_PRIVACY) : 0; - + /* Fill in the capabilities */ + (*pkt)->capability = ieee80211softmac_capabilities(mac, net); + /* Fill in Listen Interval (?) */ (*pkt)->listen_interval = cpu_to_le16(10); /* Fill in the current AP MAC */ @@ -268,26 +292,27 @@ ieee80211softmac_reassoc_req(struct ieee80211_reassoc_request **pkt, static u32 ieee80211softmac_auth(struct ieee80211_auth **pkt, struct ieee80211softmac_device *mac, struct ieee80211softmac_network *net, - u16 transaction, u16 status) + u16 transaction, u16 status, int *encrypt_mpdu) { u8 *data; + int auth_mode = mac->ieee->sec.auth_mode; + int is_shared_response = (auth_mode == WLAN_AUTH_SHARED_KEY + && transaction == IEEE80211SOFTMAC_AUTH_SHARED_RESPONSE); + /* Allocate Packet */ (*pkt) = (struct ieee80211_auth *)ieee80211softmac_alloc_mgt( 2 + /* Auth Algorithm */ 2 + /* Auth Transaction Seq */ 2 + /* Status Code */ /* Challenge Text IE */ - mac->ieee->open_wep ? 0 : - 1 + 1 + WLAN_AUTH_CHALLENGE_LEN - ); + is_shared_response ? 0 : 1 + 1 + net->challenge_len + ); if (unlikely((*pkt) == NULL)) return 0; ieee80211softmac_hdr_3addr(mac, &((*pkt)->header), IEEE80211_STYPE_AUTH, net->bssid, net->bssid); /* Algorithm */ - (*pkt)->algorithm = mac->ieee->open_wep ? - cpu_to_le16(WLAN_AUTH_OPEN) : - cpu_to_le16(WLAN_AUTH_SHARED_KEY); + (*pkt)->algorithm = cpu_to_le16(auth_mode); /* Transaction */ (*pkt)->transaction = cpu_to_le16(transaction); /* Status */ @@ -295,18 +320,20 @@ ieee80211softmac_auth(struct ieee80211_auth **pkt, data = (u8 *)(*pkt)->info_element; /* Challenge Text */ - if(!mac->ieee->open_wep){ + if (is_shared_response) { *data = MFIE_TYPE_CHALLENGE; data++; /* Copy the challenge in */ - // *data = challenge length - // data += sizeof(u16); - // memcpy(data, challenge, challenge length); - // data += challenge length; - - /* Add the full size to the packet length */ - } + *data = net->challenge_len; + data++; + memcpy(data, net->challenge, net->challenge_len); + data += net->challenge_len; + + /* Make sure this frame gets encrypted with the shared key */ + *encrypt_mpdu = 1; + } else + *encrypt_mpdu = 0; /* Return the packet size */ return (data - (u8 *)(*pkt)); @@ -396,6 +423,7 @@ ieee80211softmac_send_mgt_frame(struct ieee80211softmac_device *mac, { void *pkt = NULL; u32 pkt_size = 0; + int encrypt_mpdu = 0; switch(type) { case IEEE80211_STYPE_ASSOC_REQ: @@ -405,7 +433,7 @@ ieee80211softmac_send_mgt_frame(struct ieee80211softmac_device *mac, pkt_size = ieee80211softmac_reassoc_req((struct ieee80211_reassoc_request **)(&pkt), mac, (struct ieee80211softmac_network *)ptrarg); break; case IEEE80211_STYPE_AUTH: - pkt_size = ieee80211softmac_auth((struct ieee80211_auth **)(&pkt), mac, (struct ieee80211softmac_network *)ptrarg, (u16)(arg & 0xFFFF), (u16) (arg >> 16)); + pkt_size = ieee80211softmac_auth((struct ieee80211_auth **)(&pkt), mac, (struct ieee80211softmac_network *)ptrarg, (u16)(arg & 0xFFFF), (u16) (arg >> 16), &encrypt_mpdu); break; case IEEE80211_STYPE_DISASSOC: case IEEE80211_STYPE_DEAUTH: @@ -434,52 +462,8 @@ ieee80211softmac_send_mgt_frame(struct ieee80211softmac_device *mac, * or get rid of it alltogether? * Does this work for you now? */ - ieee80211_tx_frame(mac->ieee, (struct ieee80211_hdr *)pkt, pkt_size); - - kfree(pkt); - return 0; -} - - -/* Create an rts/cts frame */ -static u32 -ieee80211softmac_rts_cts(struct ieee80211_hdr_2addr **pkt, - struct ieee80211softmac_device *mac, struct ieee80211softmac_network *net, - u32 type) -{ - /* Allocate Packet */ - (*pkt) = kmalloc(IEEE80211_2ADDR_LEN, GFP_ATOMIC); - memset(*pkt, 0, IEEE80211_2ADDR_LEN); - if((*pkt) == NULL) - return 0; - ieee80211softmac_hdr_2addr(mac, (*pkt), type, net->bssid); - return IEEE80211_2ADDR_LEN; -} - - -/* Sends a control packet */ -static int -ieee80211softmac_send_ctl_frame(struct ieee80211softmac_device *mac, - struct ieee80211softmac_network *net, u32 type, u32 arg) -{ - void *pkt = NULL; - u32 pkt_size = 0; - - switch(type) { - case IEEE80211_STYPE_RTS: - case IEEE80211_STYPE_CTS: - pkt_size = ieee80211softmac_rts_cts((struct ieee80211_hdr_2addr **)(&pkt), mac, net, type); - break; - default: - printkl(KERN_DEBUG PFX "Unsupported Control Frame type: %i\n", type); - return -EINVAL; - } - - if(pkt_size == 0) - return -ENOMEM; - - /* Send the packet to the ieee80211 layer for tx */ - ieee80211_tx_frame(mac->ieee, (struct ieee80211_hdr *) pkt, pkt_size); + ieee80211_tx_frame(mac->ieee, (struct ieee80211_hdr *)pkt, + IEEE80211_3ADDR_LEN, pkt_size, encrypt_mpdu); kfree(pkt); return 0; diff --git a/net/ieee80211/softmac/ieee80211softmac_module.c b/net/ieee80211/softmac/ieee80211softmac_module.c index 6252be2c0db9..4b2e57d12418 100644 --- a/net/ieee80211/softmac/ieee80211softmac_module.c +++ b/net/ieee80211/softmac/ieee80211softmac_module.c @@ -26,6 +26,7 @@ #include "ieee80211softmac_priv.h" #include <linux/sort.h> +#include <linux/etherdevice.h> struct net_device *alloc_ieee80211softmac(int sizeof_priv) { @@ -61,14 +62,6 @@ struct net_device *alloc_ieee80211softmac(int sizeof_priv) softmac->wait_for_scan = ieee80211softmac_wait_for_scan_implementation; softmac->stop_scan = ieee80211softmac_stop_scan_implementation; - //TODO: The mcast rate has to be assigned dynamically somewhere (in scanning, association. Not sure...) - // It has to be set to the highest rate all stations in the current network can handle. - softmac->txrates.mcast_rate = IEEE80211_CCK_RATE_1MB; - softmac->txrates.mcast_fallback = IEEE80211_CCK_RATE_1MB; - /* This is reassigned in ieee80211softmac_start to sane values. */ - softmac->txrates.default_rate = IEEE80211_CCK_RATE_1MB; - softmac->txrates.default_fallback = IEEE80211_CCK_RATE_1MB; - /* to start with, we can't send anything ... */ netif_carrier_off(dev); @@ -170,15 +163,82 @@ static void ieee80211softmac_start_check_rates(struct ieee80211softmac_device *m } } -void ieee80211softmac_start(struct net_device *dev) +int ieee80211softmac_ratesinfo_rate_supported(struct ieee80211softmac_ratesinfo *ri, u8 rate) +{ + int search; + u8 search_rate; + + for (search = 0; search < ri->count; search++) { + search_rate = ri->rates[search]; + search_rate &= ~IEEE80211_BASIC_RATE_MASK; + if (rate == search_rate) + return 1; + } + + return 0; +} + +/* Finds the highest rate which is: + * 1. Present in ri (optionally a basic rate) + * 2. Supported by the device + * 3. Less than or equal to the user-defined rate + */ +static u8 highest_supported_rate(struct ieee80211softmac_device *mac, + struct ieee80211softmac_ratesinfo *ri, int basic_only) +{ + u8 user_rate = mac->txrates.user_rate; + int i; + + if (ri->count == 0) { + dprintk(KERN_ERR PFX "empty ratesinfo?\n"); + return IEEE80211_CCK_RATE_1MB; + } + + for (i = ri->count - 1; i >= 0; i--) { + u8 rate = ri->rates[i]; + if (basic_only && !(rate & IEEE80211_BASIC_RATE_MASK)) + continue; + rate &= ~IEEE80211_BASIC_RATE_MASK; + if (rate > user_rate) + continue; + if (ieee80211softmac_ratesinfo_rate_supported(&mac->ratesinfo, rate)) + return rate; + } + + /* If we haven't found a suitable rate by now, just trust the user */ + return user_rate; +} + +void ieee80211softmac_recalc_txrates(struct ieee80211softmac_device *mac) +{ + struct ieee80211softmac_txrates *txrates = &mac->txrates; + struct ieee80211softmac_txrates oldrates; + u32 change = 0; + + if (mac->txrates_change) + oldrates = mac->txrates; + + change |= IEEE80211SOFTMAC_TXRATECHG_DEFAULT; + txrates->default_rate = highest_supported_rate(mac, &mac->associnfo.supported_rates, 0); + + change |= IEEE80211SOFTMAC_TXRATECHG_DEFAULT_FBACK; + txrates->default_fallback = lower_rate(mac, txrates->default_rate); + + change |= IEEE80211SOFTMAC_TXRATECHG_MCAST; + txrates->mcast_rate = highest_supported_rate(mac, &mac->associnfo.supported_rates, 1); + + if (mac->txrates_change) + mac->txrates_change(mac->dev, change, &oldrates); + +} + +void ieee80211softmac_init_txrates(struct ieee80211softmac_device *mac) { - struct ieee80211softmac_device *mac = ieee80211_priv(dev); struct ieee80211_device *ieee = mac->ieee; u32 change = 0; + struct ieee80211softmac_txrates *txrates = &mac->txrates; struct ieee80211softmac_txrates oldrates; - ieee80211softmac_start_check_rates(mac); - /* TODO: We need some kind of state machine to lower the default rates * if we loose too many packets. */ @@ -193,22 +253,37 @@ void ieee80211softmac_start(struct net_device *dev) more reliable. Note similar logic in ieee80211softmac_wx_set_rate() */ if (ieee->modulation & IEEE80211_CCK_MODULATION) { - mac->txrates.default_rate = IEEE80211_CCK_RATE_11MB; - change |= IEEE80211SOFTMAC_TXRATECHG_DEFAULT; - mac->txrates.default_fallback = IEEE80211_CCK_RATE_5MB; - change |= IEEE80211SOFTMAC_TXRATECHG_DEFAULT_FBACK; + txrates->user_rate = IEEE80211_CCK_RATE_11MB; } else if (ieee->modulation & IEEE80211_OFDM_MODULATION) { - mac->txrates.default_rate = IEEE80211_OFDM_RATE_54MB; - change |= IEEE80211SOFTMAC_TXRATECHG_DEFAULT; - mac->txrates.default_fallback = IEEE80211_OFDM_RATE_24MB; - change |= IEEE80211SOFTMAC_TXRATECHG_DEFAULT_FBACK; + txrates->user_rate = IEEE80211_OFDM_RATE_54MB; } else assert(0); + + txrates->default_rate = IEEE80211_CCK_RATE_1MB; + change |= IEEE80211SOFTMAC_TXRATECHG_DEFAULT; + + txrates->default_fallback = IEEE80211_CCK_RATE_1MB; + change |= IEEE80211SOFTMAC_TXRATECHG_DEFAULT_FBACK; + + txrates->mcast_rate = IEEE80211_CCK_RATE_1MB; + change |= IEEE80211SOFTMAC_TXRATECHG_MCAST; + + txrates->mgt_mcast_rate = IEEE80211_CCK_RATE_1MB; + change |= IEEE80211SOFTMAC_TXRATECHG_MGT_MCAST; + if (mac->txrates_change) - mac->txrates_change(dev, change, &oldrates); + mac->txrates_change(mac->dev, change, &oldrates); mac->running = 1; } + +void ieee80211softmac_start(struct net_device *dev) +{ + struct ieee80211softmac_device *mac = ieee80211_priv(dev); + + ieee80211softmac_start_check_rates(mac); + ieee80211softmac_init_txrates(mac); +} EXPORT_SYMBOL_GPL(ieee80211softmac_start); void ieee80211softmac_stop(struct net_device *dev) diff --git a/net/ieee80211/softmac/ieee80211softmac_priv.h b/net/ieee80211/softmac/ieee80211softmac_priv.h index 65d9816c8ecc..fa1f8e3acfc0 100644 --- a/net/ieee80211/softmac/ieee80211softmac_priv.h +++ b/net/ieee80211/softmac/ieee80211softmac_priv.h @@ -116,7 +116,10 @@ ieee80211softmac_get_network_by_essid(struct ieee80211softmac_device *mac, struct ieee80211softmac_essid *essid); /* Rates related */ +int ieee80211softmac_ratesinfo_rate_supported(struct ieee80211softmac_ratesinfo *ri, u8 rate); u8 ieee80211softmac_lower_rate_delta(struct ieee80211softmac_device *mac, u8 rate, int delta); +void ieee80211softmac_init_txrates(struct ieee80211softmac_device *mac); +void ieee80211softmac_recalc_txrates(struct ieee80211softmac_device *mac); static inline u8 lower_rate(struct ieee80211softmac_device *mac, u8 rate) { return ieee80211softmac_lower_rate_delta(mac, rate, 1); } @@ -150,6 +153,8 @@ int ieee80211softmac_handle_disassoc(struct net_device * dev, int ieee80211softmac_handle_reassoc_req(struct net_device * dev, struct ieee80211_reassoc_request * reassoc); void ieee80211softmac_assoc_timeout(void *d); +void ieee80211softmac_send_disassoc_req(struct ieee80211softmac_device *mac, u16 reason); +void ieee80211softmac_disassoc(struct ieee80211softmac_device *mac); /* some helper functions */ static inline int ieee80211softmac_scan_handlers_check_self(struct ieee80211softmac_device *sm) diff --git a/net/ieee80211/softmac/ieee80211softmac_wx.c b/net/ieee80211/softmac/ieee80211softmac_wx.c index 27edb2b5581a..75320b6842ab 100644 --- a/net/ieee80211/softmac/ieee80211softmac_wx.c +++ b/net/ieee80211/softmac/ieee80211softmac_wx.c @@ -70,12 +70,44 @@ ieee80211softmac_wx_set_essid(struct net_device *net_dev, char *extra) { struct ieee80211softmac_device *sm = ieee80211_priv(net_dev); + struct ieee80211softmac_network *n; + struct ieee80211softmac_auth_queue_item *authptr; int length = 0; unsigned long flags; - + + /* Check if we're already associating to this or another network + * If it's another network, cancel and start over with our new network + * If it's our network, ignore the change, we're already doing it! + */ + if((sm->associnfo.associating || sm->associated) && + (data->essid.flags && data->essid.length && extra)) { + /* Get the associating network */ + n = ieee80211softmac_get_network_by_bssid(sm, sm->associnfo.bssid); + if(n && n->essid.len == (data->essid.length - 1) && + !memcmp(n->essid.data, extra, n->essid.len)) { + dprintk(KERN_INFO PFX "Already associating or associated to "MAC_FMT"\n", + MAC_ARG(sm->associnfo.bssid)); + return 0; + } else { + dprintk(KERN_INFO PFX "Canceling existing associate request!\n"); + spin_lock_irqsave(&sm->lock,flags); + /* Cancel assoc work */ + cancel_delayed_work(&sm->associnfo.work); + /* We don't have to do this, but it's a little cleaner */ + list_for_each_entry(authptr, &sm->auth_queue, list) + cancel_delayed_work(&authptr->work); + sm->associnfo.bssvalid = 0; + sm->associnfo.bssfixed = 0; + spin_unlock_irqrestore(&sm->lock,flags); + flush_scheduled_work(); + } + } + + spin_lock_irqsave(&sm->lock, flags); - + sm->associnfo.static_essid = 0; + sm->associnfo.assoc_wait = 0; if (data->essid.flags && data->essid.length && extra /*required?*/) { length = min(data->essid.length - 1, IW_ESSID_MAX_SIZE); @@ -211,8 +243,8 @@ ieee80211softmac_wx_set_rate(struct net_device *net_dev, if (is_ofdm && !(ieee->modulation & IEEE80211_OFDM_MODULATION)) goto out_unlock; - mac->txrates.default_rate = rate; - mac->txrates.default_fallback = lower_rate(mac, rate); + mac->txrates.user_rate = rate; + ieee80211softmac_recalc_txrates(mac); err = 0; out_unlock: @@ -388,7 +420,7 @@ ieee80211softmac_wx_set_genie(struct net_device *dev, memcpy(mac->wpa.IE, extra, wrqu->data.length); dprintk(KERN_INFO PFX "generic IE set to "); for (i=0;i<wrqu->data.length;i++) - dprintk("%.2x", mac->wpa.IE[i]); + dprintk("%.2x", (u8)mac->wpa.IE[i]); dprintk("\n"); mac->wpa.IElen = wrqu->data.length; } else { @@ -431,3 +463,35 @@ ieee80211softmac_wx_get_genie(struct net_device *dev, } EXPORT_SYMBOL_GPL(ieee80211softmac_wx_get_genie); +int +ieee80211softmac_wx_set_mlme(struct net_device *dev, + struct iw_request_info *info, + union iwreq_data *wrqu, + char *extra) +{ + struct ieee80211softmac_device *mac = ieee80211_priv(dev); + struct iw_mlme *mlme = (struct iw_mlme *)extra; + u16 reason = cpu_to_le16(mlme->reason_code); + struct ieee80211softmac_network *net; + + if (memcmp(mac->associnfo.bssid, mlme->addr.sa_data, ETH_ALEN)) { + printk(KERN_DEBUG PFX "wx_set_mlme: requested operation on net we don't use\n"); + return -EINVAL; + } + + switch (mlme->cmd) { + case IW_MLME_DEAUTH: + net = ieee80211softmac_get_network_by_bssid_locked(mac, mlme->addr.sa_data); + if (!net) { + printk(KERN_DEBUG PFX "wx_set_mlme: we should know the net here...\n"); + return -EINVAL; + } + return ieee80211softmac_deauth_req(mac, net, reason); + case IW_MLME_DISASSOC: + ieee80211softmac_send_disassoc_req(mac, reason); + return 0; + default: + return -EOPNOTSUPP; + } +} +EXPORT_SYMBOL_GPL(ieee80211softmac_wx_set_mlme); diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index e40f75322377..8514106761b0 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -414,6 +414,24 @@ config INET_TUNNEL tristate default n +config INET_XFRM_MODE_TRANSPORT + tristate "IP: IPsec transport mode" + default y + select XFRM + ---help--- + Support for IPsec transport mode. + + If unsure, say Y. + +config INET_XFRM_MODE_TUNNEL + tristate "IP: IPsec tunnel mode" + default y + select XFRM + ---help--- + Support for IPsec tunnel mode. + + If unsure, say Y. + config INET_DIAG tristate "INET: socket monitoring interface" default y @@ -532,6 +550,28 @@ config TCP_CONG_SCALABLE properties, though is known to have fairness issues. See http://www-lce.eng.cam.ac.uk/~ctk21/scalable/ +config TCP_CONG_LP + tristate "TCP Low Priority" + depends on EXPERIMENTAL + default n + ---help--- + TCP Low Priority (TCP-LP), a distributed algorithm whose goal is + to utiliza only the excess network bandwidth as compared to the + ``fair share`` of bandwidth as targeted by TCP. + See http://www-ece.rice.edu/networks/TCP-LP/ + +config TCP_CONG_VENO + tristate "TCP Veno" + depends on EXPERIMENTAL + default n + ---help--- + TCP Veno is a sender-side only enhancement of TCP to obtain better + throughput over wireless networks. TCP Veno makes use of state + distinguishing to circumvent the difficult judgment of the packet loss + type. TCP Veno cuts down less congestion window in response to random + loss packets. + See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf + endmenu config TCP_CONG_BIC diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 9ef50a0b9d2c..4878fc5be85f 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -24,6 +24,8 @@ obj-$(CONFIG_INET_ESP) += esp4.o obj-$(CONFIG_INET_IPCOMP) += ipcomp.o obj-$(CONFIG_INET_XFRM_TUNNEL) += xfrm4_tunnel.o obj-$(CONFIG_INET_TUNNEL) += tunnel4.o +obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o +obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o obj-$(CONFIG_IP_PNP) += ipconfig.o obj-$(CONFIG_IP_ROUTE_MULTIPATH_RR) += multipath_rr.o obj-$(CONFIG_IP_ROUTE_MULTIPATH_RANDOM) += multipath_random.o @@ -34,6 +36,7 @@ obj-$(CONFIG_IP_VS) += ipvs/ obj-$(CONFIG_INET_DIAG) += inet_diag.o obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o +obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o obj-$(CONFIG_TCP_CONG_WESTWOOD) += tcp_westwood.o @@ -41,7 +44,9 @@ obj-$(CONFIG_TCP_CONG_HSTCP) += tcp_highspeed.o obj-$(CONFIG_TCP_CONG_HYBLA) += tcp_hybla.o obj-$(CONFIG_TCP_CONG_HTCP) += tcp_htcp.o obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o +obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o +obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ xfrm4_output.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 0a277453526b..c84a32070f8d 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -68,6 +68,7 @@ */ #include <linux/config.h> +#include <linux/err.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> @@ -1096,6 +1097,96 @@ int inet_sk_rebuild_header(struct sock *sk) EXPORT_SYMBOL(inet_sk_rebuild_header); +static int inet_gso_send_check(struct sk_buff *skb) +{ + struct iphdr *iph; + struct net_protocol *ops; + int proto; + int ihl; + int err = -EINVAL; + + if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) + goto out; + + iph = skb->nh.iph; + ihl = iph->ihl * 4; + if (ihl < sizeof(*iph)) + goto out; + + if (unlikely(!pskb_may_pull(skb, ihl))) + goto out; + + skb->h.raw = __skb_pull(skb, ihl); + iph = skb->nh.iph; + proto = iph->protocol & (MAX_INET_PROTOS - 1); + err = -EPROTONOSUPPORT; + + rcu_read_lock(); + ops = rcu_dereference(inet_protos[proto]); + if (likely(ops && ops->gso_send_check)) + err = ops->gso_send_check(skb); + rcu_read_unlock(); + +out: + return err; +} + +static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + struct iphdr *iph; + struct net_protocol *ops; + int proto; + int ihl; + int id; + + if (unlikely(skb_shinfo(skb)->gso_type & + ~(SKB_GSO_TCPV4 | + SKB_GSO_UDP | + SKB_GSO_DODGY | + SKB_GSO_TCP_ECN | + 0))) + goto out; + + if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) + goto out; + + iph = skb->nh.iph; + ihl = iph->ihl * 4; + if (ihl < sizeof(*iph)) + goto out; + + if (unlikely(!pskb_may_pull(skb, ihl))) + goto out; + + skb->h.raw = __skb_pull(skb, ihl); + iph = skb->nh.iph; + id = ntohs(iph->id); + proto = iph->protocol & (MAX_INET_PROTOS - 1); + segs = ERR_PTR(-EPROTONOSUPPORT); + + rcu_read_lock(); + ops = rcu_dereference(inet_protos[proto]); + if (likely(ops && ops->gso_segment)) + segs = ops->gso_segment(skb, features); + rcu_read_unlock(); + + if (!segs || unlikely(IS_ERR(segs))) + goto out; + + skb = segs; + do { + iph = skb->nh.iph; + iph->id = htons(id++); + iph->tot_len = htons(skb->len - skb->mac_len); + iph->check = 0; + iph->check = ip_fast_csum(skb->nh.raw, iph->ihl); + } while ((skb = skb->next)); + +out: + return segs; +} + #ifdef CONFIG_IP_MULTICAST static struct net_protocol igmp_protocol = { .handler = igmp_rcv, @@ -1105,6 +1196,8 @@ static struct net_protocol igmp_protocol = { static struct net_protocol tcp_protocol = { .handler = tcp_v4_rcv, .err_handler = tcp_v4_err, + .gso_send_check = tcp_v4_gso_send_check, + .gso_segment = tcp_tso_segment, .no_policy = 1, }; @@ -1150,6 +1243,8 @@ static int ipv4_proc_init(void); static struct packet_type ip_packet_type = { .type = __constant_htons(ETH_P_IP), .func = ip_rcv, + .gso_send_check = inet_gso_send_check, + .gso_segment = inet_gso_segment, }; static int __init inet_init(void) diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index e2e4771fa4c6..8e748be36c5a 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -1,4 +1,3 @@ -#include <linux/config.h> #include <linux/module.h> #include <net/ip.h> #include <net/xfrm.h> @@ -119,6 +118,7 @@ error: static int ah_input(struct xfrm_state *x, struct sk_buff *skb) { int ah_hlen; + int ihl; struct iphdr *iph; struct ip_auth_hdr *ah; struct ah_data *ahp; @@ -149,13 +149,14 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) ah = (struct ip_auth_hdr*)skb->data; iph = skb->nh.iph; - memcpy(work_buf, iph, iph->ihl*4); + ihl = skb->data - skb->nh.raw; + memcpy(work_buf, iph, ihl); iph->ttl = 0; iph->tos = 0; iph->frag_off = 0; iph->check = 0; - if (iph->ihl != 5) { + if (ihl > sizeof(*iph)) { u32 dummy; if (ip_clear_mutable_options(iph, &dummy)) goto out; @@ -164,7 +165,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) u8 auth_data[MAX_AH_AUTH_LEN]; memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len); - skb_push(skb, skb->data - skb->nh.raw); + skb_push(skb, ihl); ahp->icv(ahp, skb, ah->auth_data); if (memcmp(ah->auth_data, auth_data, ahp->icv_trunc_len)) { x->stats.integrity_failed++; @@ -172,11 +173,8 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) } } ((struct iphdr*)work_buf)->protocol = ah->nexthdr; - skb->nh.raw = skb_pull(skb, ah_hlen); - memcpy(skb->nh.raw, work_buf, iph->ihl*4); - skb->nh.iph->tot_len = htons(skb->len); - skb_pull(skb, skb->nh.iph->ihl*4); - skb->h.raw = skb->data; + skb->h.raw = memcpy(skb->nh.raw += ah_hlen, work_buf, ihl); + __skb_pull(skb, ah_hlen + ihl); return 0; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 4749d504c629..7b51b3bdb548 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -80,7 +80,6 @@ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/capability.h> -#include <linux/config.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/errno.h> diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index c1b42b5257f8..ec5da4fbd9f4 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -11,7 +11,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/types.h> #include <linux/module.h> #include <linux/ip.h> diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 54419b27686f..a7c65e9e5ec9 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -27,7 +27,6 @@ * if no match found. */ -#include <linux/config.h> #include <asm/uaccess.h> #include <asm/system.h> diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 9d1881c07a32..4e112738b3fa 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -1,4 +1,3 @@ -#include <linux/config.h> #include <linux/module.h> #include <net/ip.h> #include <net/xfrm.h> @@ -143,10 +142,9 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) int alen = esp->auth.icv_trunc_len; int elen = skb->len - sizeof(struct ip_esp_hdr) - esp->conf.ivlen - alen; int nfrags; - int encap_len = 0; + int ihl; u8 nexthdr[2]; struct scatterlist *sg; - u8 workbuf[60]; int padlen; if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr))) @@ -177,7 +175,6 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) skb->ip_summed = CHECKSUM_NONE; esph = (struct ip_esp_hdr*)skb->data; - iph = skb->nh.iph; /* Get ivec. This can be wrong, check against another impls. */ if (esp->conf.ivlen) @@ -204,12 +201,12 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) /* ... check padding bits here. Silly. :-) */ + iph = skb->nh.iph; + ihl = iph->ihl * 4; + if (x->encap) { struct xfrm_encap_tmpl *encap = x->encap; - struct udphdr *uh; - - uh = (struct udphdr *)(iph + 1); - encap_len = (void*)esph - (void*)uh; + struct udphdr *uh = (void *)(skb->nh.raw + ihl); /* * 1) if the NAT-T peer's IP or port changed then @@ -246,11 +243,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) iph->protocol = nexthdr[1]; pskb_trim(skb, skb->len - alen - padlen - 2); - memcpy(workbuf, skb->nh.raw, iph->ihl*4); - skb->h.raw = skb_pull(skb, sizeof(struct ip_esp_hdr) + esp->conf.ivlen); - skb->nh.raw += encap_len + sizeof(struct ip_esp_hdr) + esp->conf.ivlen; - memcpy(skb->nh.raw, workbuf, iph->ihl*4); - skb->nh.iph->tot_len = htons(skb->len); + skb->h.raw = __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen) - ihl; return 0; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index cdde96390960..ba2a70745a63 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -15,7 +15,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/module.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -666,3 +665,4 @@ void __init ip_fib_init(void) } EXPORT_SYMBOL(inet_addr_type); +EXPORT_SYMBOL(ip_dev_find); diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index e2890ec8159e..3c1d32ad35f2 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -15,7 +15,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <asm/uaccess.h> #include <asm/system.h> #include <linux/bitops.h> diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index ec566f3e66c7..773b12ba4e3c 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -19,7 +19,6 @@ * Marc Boucher : routing by fwmark */ -#include <linux/config.h> #include <asm/uaccess.h> #include <asm/system.h> #include <linux/bitops.h> @@ -458,13 +457,13 @@ int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); hlist_for_each_entry(r, node, &fib_rules, hlist) { - if (idx < s_idx) - continue; + goto next; if (inet_fill_rule(skb, r, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWRULE, NLM_F_MULTI) < 0) break; +next: idx++; } rcu_read_unlock(); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 0f4145babb14..5f87533684d5 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -15,7 +15,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <asm/uaccess.h> #include <asm/system.h> #include <linux/bitops.h> diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 95a639f2e3db..23fb9d9768e3 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -52,7 +52,6 @@ #define VERSION "0.407" -#include <linux/config.h> #include <asm/uaccess.h> #include <asm/system.h> #include <asm/bitops.h> @@ -1253,8 +1252,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, */ if (!fa_head) { - fa_head = fib_insert_node(t, &err, key, plen); err = 0; + fa_head = fib_insert_node(t, &err, key, plen); if (err) goto out_free_new_fa; } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 2a0455911ee0..4c86ac3d882d 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -64,7 +64,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/jiffies.h> @@ -730,7 +729,6 @@ out_err: static void icmp_redirect(struct sk_buff *skb) { struct iphdr *iph; - unsigned long ip; if (skb->len < sizeof(struct iphdr)) goto out_err; @@ -742,7 +740,6 @@ static void icmp_redirect(struct sk_buff *skb) goto out; iph = (struct iphdr *)skb->data; - ip = iph->daddr; switch (skb->h.icmph->code & 7) { case ICMP_REDIR_NET: @@ -752,7 +749,8 @@ static void icmp_redirect(struct sk_buff *skb) */ case ICMP_REDIR_HOST: case ICMP_REDIR_HOSTTOS: - ip_rt_redirect(skb->nh.iph->saddr, ip, skb->h.icmph->un.gateway, + ip_rt_redirect(skb->nh.iph->saddr, iph->daddr, + skb->h.icmph->un.gateway, iph->saddr, skb->dev); break; } diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index d512239a1473..d299c8e547d6 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -72,7 +72,6 @@ * Vinay Kulkarni */ -#include <linux/config.h> #include <linux/module.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -2361,7 +2360,7 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v) } seq_printf(seq, - "\t\t\t\t%08lX %5d %d:%08lX\t\t%d\n", + "\t\t\t\t%08X %5d %d:%08lX\t\t%d\n", im->multiaddr, im->users, im->tm_running, im->tm_running ? jiffies_to_clock_t(im->timer.expires-jiffies) : 0, diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 9a01bb81f8bf..e50a1bfd7ccc 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -13,7 +13,6 @@ * 2 of the License, or(at your option) any later version. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/jhash.h> diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 457db99c76df..8e7e41b66c79 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -11,7 +11,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/fcntl.h> diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index ee9b5515b9ae..95fac5532994 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -13,7 +13,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/random.h> #include <linux/sched.h> diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 417f126c749e..cdd805344c61 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -8,7 +8,6 @@ * From code orinally in TCP */ -#include <linux/config.h> #include <net/inet_hashtables.h> #include <net/inet_timewait_sock.h> diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 2160874ce7aa..03ff62ebcfeb 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -86,7 +86,7 @@ static struct inet_peer *peer_root = peer_avl_empty; static DEFINE_RWLOCK(peer_pool_lock); #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ -static volatile int peer_total; +static int peer_total; /* Exported for sysctl_net_ipv4. */ int inet_peer_threshold = 65536 + 128; /* start to throw entries more * aggressively at this stage */ diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 0923add122b4..a22d11d2911c 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -21,7 +21,6 @@ * Mike McLagan : Routing by source */ -#include <linux/config.h> #include <linux/types.h> #include <linux/mm.h> #include <linux/sched.h> @@ -116,6 +115,7 @@ sr_failed: too_many_hops: /* Tell the sender its packet died... */ + IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); drop: kfree_skb(skb); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index da734c439179..b84b53a47526 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -23,7 +23,6 @@ */ #include <linux/compiler.h> -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/mm.h> diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index ab99bebdcdc8..6ff9b10d9563 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -11,7 +11,6 @@ */ #include <linux/capability.h> -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/sched.h> diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index c9026dbf4c93..184c78ca79e6 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -121,7 +121,6 @@ #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> -#include <linux/config.h> #include <linux/net.h> #include <linux/socket.h> @@ -429,6 +428,9 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, goto drop; } + /* Remove any debris in the socket control block */ + memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); + return NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, dev, NULL, ip_rcv_finish); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index cff9c3a72daf..7c9f9a6421b8 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -53,7 +53,6 @@ #include <linux/mm.h> #include <linux/string.h> #include <linux/errno.h> -#include <linux/config.h> #include <linux/socket.h> #include <linux/sockios.h> @@ -210,8 +209,7 @@ static inline int ip_finish_output(struct sk_buff *skb) return dst_output(skb); } #endif - if (skb->len > dst_mtu(skb->dst) && - !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size)) + if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) return ip_fragment(skb, ip_finish_output2); else return ip_finish_output2(skb); @@ -362,7 +360,7 @@ packet_routed: } ip_select_ident_more(iph, &rt->u.dst, sk, - (skb_shinfo(skb)->tso_segs ?: 1) - 1); + (skb_shinfo(skb)->gso_segs ?: 1) - 1); /* Add an IP checksum. */ ip_send_check(iph); @@ -410,6 +408,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) nf_bridge_get(to->nf_bridge); #endif #endif + skb_copy_secmark(to, from); } /* @@ -743,7 +742,8 @@ static inline int ip_ufo_append_data(struct sock *sk, (length - transhdrlen)); if (!err) { /* specify the length of each IP datagram fragment*/ - skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen); + skb_shinfo(skb)->gso_size = mtu - fragheaderlen; + skb_shinfo(skb)->gso_type = SKB_GSO_UDP; __skb_queue_tail(&sk->sk_write_queue, skb); return 0; @@ -839,7 +839,7 @@ int ip_append_data(struct sock *sk, */ if (transhdrlen && length + fragheaderlen <= mtu && - rt->u.dst.dev->features&(NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM) && + rt->u.dst.dev->features & NETIF_F_ALL_CSUM && !exthdrlen) csummode = CHECKSUM_HW; @@ -1086,14 +1086,16 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, inet->cork.length += size; if ((sk->sk_protocol == IPPROTO_UDP) && - (rt->u.dst.dev->features & NETIF_F_UFO)) - skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen); + (rt->u.dst.dev->features & NETIF_F_UFO)) { + skb_shinfo(skb)->gso_size = mtu - fragheaderlen; + skb_shinfo(skb)->gso_type = SKB_GSO_UDP; + } while (size > 0) { int i; - if (skb_shinfo(skb)->ufo_size) + if (skb_is_gso(skb)) len = size; else { diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 12e0bf19f24a..84f43a3c9098 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -17,7 +17,6 @@ * Mike McLagan : Routing by source */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/mm.h> diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 95278b22b669..8a8b5cf2f7fe 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -13,7 +13,6 @@ * - Compression stats. * - Adaptive compression. */ -#include <linux/config.h> #include <linux/module.h> #include <asm/scatterlist.h> #include <asm/semaphore.h> @@ -45,7 +44,6 @@ static LIST_HEAD(ipcomp_tfms_list); static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) { int err, plen, dlen; - struct iphdr *iph; struct ipcomp_data *ipcd = x->data; u8 *start, *scratch; struct crypto_tfm *tfm; @@ -72,10 +70,9 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) if (err) goto out; - skb_put(skb, dlen - plen); + skb->truesize += dlen - plen; + __skb_put(skb, dlen - plen); memcpy(skb->data, scratch, dlen); - iph = skb->nh.iph; - iph->tot_len = htons(dlen + iph->ihl * 4); out: put_cpu(); return err; @@ -83,34 +80,21 @@ out: static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb) { - u8 nexthdr; - int err = 0; + int err = -ENOMEM; struct iphdr *iph; - union { - struct iphdr iph; - char buf[60]; - } tmp_iph; - + struct ip_comp_hdr *ipch; - if ((skb_is_nonlinear(skb) || skb_cloned(skb)) && - skb_linearize(skb, GFP_ATOMIC) != 0) { - err = -ENOMEM; + if (skb_linearize_cow(skb)) goto out; - } skb->ip_summed = CHECKSUM_NONE; /* Remove ipcomp header and decompress original payload */ iph = skb->nh.iph; - memcpy(&tmp_iph, iph, iph->ihl * 4); - nexthdr = *(u8 *)skb->data; - skb_pull(skb, sizeof(struct ip_comp_hdr)); - skb->nh.raw += sizeof(struct ip_comp_hdr); - memcpy(skb->nh.raw, &tmp_iph, tmp_iph.iph.ihl * 4); - iph = skb->nh.iph; - iph->tot_len = htons(ntohs(iph->tot_len) - sizeof(struct ip_comp_hdr)); - iph->protocol = nexthdr; - skb->h.raw = skb->data; + ipch = (void *)skb->data; + iph->protocol = ipch->nexthdr; + skb->h.raw = skb->nh.raw + sizeof(*ipch); + __skb_pull(skb, sizeof(*ipch)); err = ipcomp_decompress(x, skb); out: @@ -171,10 +155,8 @@ static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb) goto out_ok; } - if ((skb_is_nonlinear(skb) || skb_cloned(skb)) && - skb_linearize(skb, GFP_ATOMIC) != 0) { + if (skb_linearize_cow(skb)) goto out_ok; - } err = ipcomp_compress(x, skb); iph = skb->nh.iph; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index ea398ee43f28..3291d5192aad 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -94,7 +94,6 @@ #include <linux/capability.h> -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/sched.h> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 717ab7d6d7b6..ba33f8621c67 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -28,7 +28,6 @@ * */ -#include <linux/config.h> #include <asm/system.h> #include <asm/uaccess.h> #include <linux/types.h> diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c index c453e1e57f4b..4c1940381ba0 100644 --- a/net/ipv4/ipvs/ip_vs_est.c +++ b/net/ipv4/ipvs/ip_vs_est.c @@ -13,7 +13,6 @@ * Changes: * */ -#include <linux/config.h> #include <linux/kernel.h> #include <linux/jiffies.h> #include <linux/slab.h> diff --git a/net/ipv4/multipath_drr.c b/net/ipv4/multipath_drr.c index db67373f9b34..252e837b17a5 100644 --- a/net/ipv4/multipath_drr.c +++ b/net/ipv4/multipath_drr.c @@ -12,7 +12,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <asm/system.h> #include <asm/uaccess.h> #include <linux/types.h> diff --git a/net/ipv4/multipath_random.c b/net/ipv4/multipath_random.c index 5249dbe7c559..b8c289f247cb 100644 --- a/net/ipv4/multipath_random.c +++ b/net/ipv4/multipath_random.c @@ -12,7 +12,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <asm/system.h> #include <asm/uaccess.h> #include <linux/types.h> diff --git a/net/ipv4/multipath_rr.c b/net/ipv4/multipath_rr.c index b6cd2870478f..bba5abe5542d 100644 --- a/net/ipv4/multipath_rr.c +++ b/net/ipv4/multipath_rr.c @@ -12,7 +12,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <asm/system.h> #include <asm/uaccess.h> #include <linux/types.h> diff --git a/net/ipv4/multipath_wrandom.c b/net/ipv4/multipath_wrandom.c index 342d0b9098f5..d25ec4ae09e5 100644 --- a/net/ipv4/multipath_wrandom.c +++ b/net/ipv4/multipath_wrandom.c @@ -12,7 +12,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <asm/system.h> #include <asm/uaccess.h> #include <linux/types.h> diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index d4072533da21..ef0b5aac5838 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -55,6 +55,18 @@ config IP_NF_CONNTRACK_MARK of packets, but this mark value is kept in the conntrack session instead of the individual packets. +config IP_NF_CONNTRACK_SECMARK + bool 'Connection tracking security mark support' + depends on IP_NF_CONNTRACK && NETWORK_SECMARK + help + This option enables security markings to be applied to + connections. Typically they are copied to connections from + packets using the CONNSECMARK target and copied back from + connections to packets with the same target, with the packets + being originally labeled via SECMARK. + + If unsure, say 'N'. + config IP_NF_CONNTRACK_EVENTS bool "Connection tracking events (EXPERIMENTAL)" depends on EXPERIMENTAL && IP_NF_CONNTRACK @@ -142,6 +154,8 @@ config IP_NF_TFTP config IP_NF_AMANDA tristate "Amanda backup protocol support" depends on IP_NF_CONNTRACK + select TEXTSEARCH + select TEXTSEARCH_KMP help If you are running the Amanda backup package <http://www.amanda.org/> on this machine or machines that will be MASQUERADED through this @@ -181,14 +195,26 @@ config IP_NF_H323 With this module you can support H.323 on a connection tracking/NAT firewall. - This module supports RAS, Fast-start, H.245 tunnelling, RTP/RTCP - and T.120 based data and applications including audio, video, FAX, - chat, whiteboard, file transfer, etc. For more information, please - see http://nath323.sourceforge.net/. + This module supports RAS, Fast Start, H.245 Tunnelling, Call + Forwarding, RTP/RTCP and T.120 based audio, video, fax, chat, + whiteboard, file transfer, etc. For more information, please + visit http://nath323.sourceforge.net/. If you want to compile it as a module, say 'M' here and read Documentation/modules.txt. If unsure, say 'N'. +config IP_NF_SIP + tristate "SIP protocol support (EXPERIMENTAL)" + depends on IP_NF_CONNTRACK && EXPERIMENTAL + help + SIP is an application-layer control protocol that can establish, + modify, and terminate multimedia sessions (conferences) such as + Internet telephony calls. With the ip_conntrack_sip and + the ip_nat_sip modules you can support the protocol on a connection + tracking/NATing firewall. + + To compile it as a module, choose M here. If unsure, say Y. + config IP_NF_QUEUE tristate "IP Userspace queueing via NETLINK (OBSOLETE)" help @@ -306,7 +332,7 @@ config IP_NF_MATCH_HASHLIMIT help This option adds a new iptables `hashlimit' match. - As opposed to `limit', this match dynamically crates a hash table + As opposed to `limit', this match dynamically creates a hash table of limit buckets, based on your selection of source/destination ip addresses and/or ports. @@ -501,6 +527,12 @@ config IP_NF_NAT_H323 default IP_NF_NAT if IP_NF_H323=y default m if IP_NF_H323=m +config IP_NF_NAT_SIP + tristate + depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n + default IP_NF_NAT if IP_NF_SIP=y + default m if IP_NF_SIP=m + # mangle + specific targets config IP_NF_MANGLE tristate "Packet mangling" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 461cb1eb5de7..3ded4a3af59c 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -31,6 +31,7 @@ obj-$(CONFIG_IP_NF_AMANDA) += ip_conntrack_amanda.o obj-$(CONFIG_IP_NF_TFTP) += ip_conntrack_tftp.o obj-$(CONFIG_IP_NF_FTP) += ip_conntrack_ftp.o obj-$(CONFIG_IP_NF_IRC) += ip_conntrack_irc.o +obj-$(CONFIG_IP_NF_SIP) += ip_conntrack_sip.o obj-$(CONFIG_IP_NF_NETBIOS_NS) += ip_conntrack_netbios_ns.o # NAT helpers @@ -40,6 +41,7 @@ obj-$(CONFIG_IP_NF_NAT_AMANDA) += ip_nat_amanda.o obj-$(CONFIG_IP_NF_NAT_TFTP) += ip_nat_tftp.o obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o obj-$(CONFIG_IP_NF_NAT_IRC) += ip_nat_irc.o +obj-$(CONFIG_IP_NF_NAT_SIP) += ip_nat_sip.o # generic IP tables obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index d0d19192026d..80c73ca90116 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -9,7 +9,6 @@ * */ -#include <linux/config.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/netdevice.h> @@ -1120,7 +1119,8 @@ int arpt_register_table(struct arpt_table *table, return ret; } - if (xt_register_table(table, &bootstrap, newinfo) != 0) { + ret = xt_register_table(table, &bootstrap, newinfo); + if (ret != 0) { xt_free_table_info(newinfo); return ret; } diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c index a604b1ccfdaa..0a7bd7f04061 100644 --- a/net/ipv4/netfilter/ip_conntrack_amanda.c +++ b/net/ipv4/netfilter/ip_conntrack_amanda.c @@ -17,33 +17,29 @@ * this value. * */ - -#include <linux/in.h> #include <linux/kernel.h> #include <linux/module.h> -#include <linux/netfilter.h> -#include <linux/ip.h> #include <linux/moduleparam.h> +#include <linux/textsearch.h> +#include <linux/skbuff.h> +#include <linux/in.h> +#include <linux/ip.h> #include <linux/udp.h> -#include <net/checksum.h> -#include <net/udp.h> +#include <linux/netfilter.h> #include <linux/netfilter_ipv4/ip_conntrack_helper.h> #include <linux/netfilter_ipv4/ip_conntrack_amanda.h> static unsigned int master_timeout = 300; +static char *ts_algo = "kmp"; MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>"); MODULE_DESCRIPTION("Amanda connection tracking module"); MODULE_LICENSE("GPL"); module_param(master_timeout, uint, 0600); MODULE_PARM_DESC(master_timeout, "timeout for the master connection"); - -static const char *conns[] = { "DATA ", "MESG ", "INDEX " }; - -/* This is slow, but it's simple. --RR */ -static char *amanda_buffer; -static DEFINE_SPINLOCK(amanda_buffer_lock); +module_param(ts_algo, charp, 0400); +MODULE_PARM_DESC(ts_algo, "textsearch algorithm to use (default kmp)"); unsigned int (*ip_nat_amanda_hook)(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, @@ -52,12 +48,48 @@ unsigned int (*ip_nat_amanda_hook)(struct sk_buff **pskb, struct ip_conntrack_expect *exp); EXPORT_SYMBOL_GPL(ip_nat_amanda_hook); +enum amanda_strings { + SEARCH_CONNECT, + SEARCH_NEWLINE, + SEARCH_DATA, + SEARCH_MESG, + SEARCH_INDEX, +}; + +static struct { + char *string; + size_t len; + struct ts_config *ts; +} search[] = { + [SEARCH_CONNECT] = { + .string = "CONNECT ", + .len = 8, + }, + [SEARCH_NEWLINE] = { + .string = "\n", + .len = 1, + }, + [SEARCH_DATA] = { + .string = "DATA ", + .len = 5, + }, + [SEARCH_MESG] = { + .string = "MESG ", + .len = 5, + }, + [SEARCH_INDEX] = { + .string = "INDEX ", + .len = 6, + }, +}; + static int help(struct sk_buff **pskb, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) { + struct ts_state ts; struct ip_conntrack_expect *exp; - char *data, *data_limit, *tmp; - unsigned int dataoff, i; + unsigned int dataoff, start, stop, off, i; + char pbuf[sizeof("65535")], *tmp; u_int16_t port, len; int ret = NF_ACCEPT; @@ -77,29 +109,34 @@ static int help(struct sk_buff **pskb, return NF_ACCEPT; } - spin_lock_bh(&amanda_buffer_lock); - skb_copy_bits(*pskb, dataoff, amanda_buffer, (*pskb)->len - dataoff); - data = amanda_buffer; - data_limit = amanda_buffer + (*pskb)->len - dataoff; - *data_limit = '\0'; - - /* Search for the CONNECT string */ - data = strstr(data, "CONNECT "); - if (!data) + memset(&ts, 0, sizeof(ts)); + start = skb_find_text(*pskb, dataoff, (*pskb)->len, + search[SEARCH_CONNECT].ts, &ts); + if (start == UINT_MAX) goto out; - data += strlen("CONNECT "); + start += dataoff + search[SEARCH_CONNECT].len; - /* Only search first line. */ - if ((tmp = strchr(data, '\n'))) - *tmp = '\0'; + memset(&ts, 0, sizeof(ts)); + stop = skb_find_text(*pskb, start, (*pskb)->len, + search[SEARCH_NEWLINE].ts, &ts); + if (stop == UINT_MAX) + goto out; + stop += start; - for (i = 0; i < ARRAY_SIZE(conns); i++) { - char *match = strstr(data, conns[i]); - if (!match) + for (i = SEARCH_DATA; i <= SEARCH_INDEX; i++) { + memset(&ts, 0, sizeof(ts)); + off = skb_find_text(*pskb, start, stop, search[i].ts, &ts); + if (off == UINT_MAX) continue; - tmp = data = match + strlen(conns[i]); - port = simple_strtoul(data, &data, 10); - len = data - tmp; + off += start + search[i].len; + + len = min_t(unsigned int, sizeof(pbuf) - 1, stop - off); + if (skb_copy_bits(*pskb, off, pbuf, len)) + break; + pbuf[len] = '\0'; + + port = simple_strtoul(pbuf, &tmp, 10); + len = tmp - pbuf; if (port == 0 || len > 5) break; @@ -125,8 +162,7 @@ static int help(struct sk_buff **pskb, exp->mask.dst.u.tcp.port = 0xFFFF; if (ip_nat_amanda_hook) - ret = ip_nat_amanda_hook(pskb, ctinfo, - tmp - amanda_buffer, + ret = ip_nat_amanda_hook(pskb, ctinfo, off - dataoff, len, exp); else if (ip_conntrack_expect_related(exp) != 0) ret = NF_DROP; @@ -134,12 +170,11 @@ static int help(struct sk_buff **pskb, } out: - spin_unlock_bh(&amanda_buffer_lock); return ret; } static struct ip_conntrack_helper amanda_helper = { - .max_expected = ARRAY_SIZE(conns), + .max_expected = 3, .timeout = 180, .me = THIS_MODULE, .help = help, @@ -155,26 +190,36 @@ static struct ip_conntrack_helper amanda_helper = { static void __exit ip_conntrack_amanda_fini(void) { + int i; + ip_conntrack_helper_unregister(&amanda_helper); - kfree(amanda_buffer); + for (i = 0; i < ARRAY_SIZE(search); i++) + textsearch_destroy(search[i].ts); } static int __init ip_conntrack_amanda_init(void) { - int ret; - - amanda_buffer = kmalloc(65536, GFP_KERNEL); - if (!amanda_buffer) - return -ENOMEM; - - ret = ip_conntrack_helper_register(&amanda_helper); - if (ret < 0) { - kfree(amanda_buffer); - return ret; + int ret, i; + + ret = -ENOMEM; + for (i = 0; i < ARRAY_SIZE(search); i++) { + search[i].ts = textsearch_prepare(ts_algo, search[i].string, + search[i].len, + GFP_KERNEL, TS_AUTOLOAD); + if (search[i].ts == NULL) + goto err; } + ret = ip_conntrack_helper_register(&amanda_helper); + if (ret < 0) + goto err; return 0; - +err: + for (; i >= 0; i--) { + if (search[i].ts) + textsearch_destroy(search[i].ts); + } + return ret; } module_init(ip_conntrack_amanda_init); diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index a297da7bbef5..aa459177c3f8 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -17,7 +17,6 @@ * - export ip_conntrack[_expect]_{find_get,put} functions * */ -#include <linux/config.h> #include <linux/types.h> #include <linux/icmp.h> #include <linux/ip.h> @@ -724,6 +723,9 @@ init_conntrack(struct ip_conntrack_tuple *tuple, /* this is ugly, but there is no other place where to put it */ conntrack->nat.masq_index = exp->master->nat.masq_index; #endif +#ifdef CONFIG_IP_NF_CONNTRACK_SECMARK + conntrack->secmark = exp->master->secmark; +#endif nf_conntrack_get(&conntrack->master->ct_general); CONNTRACK_STAT_INC(expect_new); } else { @@ -1130,6 +1132,12 @@ void __ip_ct_refresh_acct(struct ip_conntrack *ct, write_lock_bh(&ip_conntrack_lock); + /* Only update if this is not a fixed timeout */ + if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) { + write_unlock_bh(&ip_conntrack_lock); + return; + } + /* If not in hash table, timer will not be active yet */ if (!is_confirmed(ct)) { ct->timeout.expires = extra_jiffies; diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c index 3e542bf28a9d..1d18c863f064 100644 --- a/net/ipv4/netfilter/ip_conntrack_ftp.c +++ b/net/ipv4/netfilter/ip_conntrack_ftp.c @@ -8,7 +8,6 @@ * published by the Free Software Foundation. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/netfilter.h> #include <linux/ip.h> @@ -56,37 +55,48 @@ static int try_eprt(const char *, size_t, u_int32_t [], char); static int try_epsv_response(const char *, size_t, u_int32_t [], char); static const struct ftp_search { - enum ip_conntrack_dir dir; const char *pattern; size_t plen; char skip; char term; enum ip_ct_ftp_type ftptype; int (*getnum)(const char *, size_t, u_int32_t[], char); -} search[] = { - { - IP_CT_DIR_ORIGINAL, - "PORT", sizeof("PORT") - 1, ' ', '\r', - IP_CT_FTP_PORT, - try_rfc959, +} search[IP_CT_DIR_MAX][2] = { + [IP_CT_DIR_ORIGINAL] = { + { + .pattern = "PORT", + .plen = sizeof("PORT") - 1, + .skip = ' ', + .term = '\r', + .ftptype = IP_CT_FTP_PORT, + .getnum = try_rfc959, + }, + { + .pattern = "EPRT", + .plen = sizeof("EPRT") - 1, + .skip = ' ', + .term = '\r', + .ftptype = IP_CT_FTP_EPRT, + .getnum = try_eprt, + }, }, - { - IP_CT_DIR_REPLY, - "227 ", sizeof("227 ") - 1, '(', ')', - IP_CT_FTP_PASV, - try_rfc959, - }, - { - IP_CT_DIR_ORIGINAL, - "EPRT", sizeof("EPRT") - 1, ' ', '\r', - IP_CT_FTP_EPRT, - try_eprt, - }, - { - IP_CT_DIR_REPLY, - "229 ", sizeof("229 ") - 1, '(', ')', - IP_CT_FTP_EPSV, - try_epsv_response, + [IP_CT_DIR_REPLY] = { + { + .pattern = "227 ", + .plen = sizeof("227 ") - 1, + .skip = '(', + .term = ')', + .ftptype = IP_CT_FTP_PASV, + .getnum = try_rfc959, + }, + { + .pattern = "229 ", + .plen = sizeof("229 ") - 1, + .skip = '(', + .term = ')', + .ftptype = IP_CT_FTP_EPSV, + .getnum = try_epsv_response, + }, }, }; @@ -346,17 +356,15 @@ static int help(struct sk_buff **pskb, array[2] = (ntohl(ct->tuplehash[dir].tuple.src.ip) >> 8) & 0xFF; array[3] = ntohl(ct->tuplehash[dir].tuple.src.ip) & 0xFF; - for (i = 0; i < ARRAY_SIZE(search); i++) { - if (search[i].dir != dir) continue; - + for (i = 0; i < ARRAY_SIZE(search[dir]); i++) { found = find_pattern(fb_ptr, (*pskb)->len - dataoff, - search[i].pattern, - search[i].plen, - search[i].skip, - search[i].term, + search[dir][i].pattern, + search[dir][i].plen, + search[dir][i].skip, + search[dir][i].term, &matchoff, &matchlen, array, - search[i].getnum); + search[dir][i].getnum); if (found) break; } if (found == -1) { @@ -366,7 +374,7 @@ static int help(struct sk_buff **pskb, this case. */ if (net_ratelimit()) printk("conntrack_ftp: partial %s %u+%u\n", - search[i].pattern, + search[dir][i].pattern, ntohl(th->seq), datalen); ret = NF_DROP; goto out; @@ -426,7 +434,7 @@ static int help(struct sk_buff **pskb, /* Now, NAT might want to mangle the packet, and register the * (possibly changed) expectation itself. */ if (ip_nat_ftp_hook) - ret = ip_nat_ftp_hook(pskb, ctinfo, search[i].ftptype, + ret = ip_nat_ftp_hook(pskb, ctinfo, search[dir][i].ftptype, matchoff, matchlen, exp, &seq); else { /* Can't expect this? Best to drop packet now. */ diff --git a/net/ipv4/netfilter/ip_conntrack_helper_h323.c b/net/ipv4/netfilter/ip_conntrack_helper_h323.c index 518f581d39ec..af35235672d5 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_h323.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_h323.c @@ -11,7 +11,6 @@ * For more information, please see http://nath323.sourceforge.net/ */ -#include <linux/config.h> #include <linux/module.h> #include <linux/netfilter.h> #include <linux/ip.h> @@ -22,6 +21,8 @@ #include <linux/netfilter_ipv4/ip_conntrack_tuple.h> #include <linux/netfilter_ipv4/ip_conntrack_h323.h> #include <linux/moduleparam.h> +#include <linux/ctype.h> +#include <linux/inet.h> #if 0 #define DEBUGP printk @@ -38,6 +39,12 @@ static int gkrouted_only = 1; module_param(gkrouted_only, int, 0600); MODULE_PARM_DESC(gkrouted_only, "only accept calls from gatekeeper"); +static int callforward_filter = 1; +module_param(callforward_filter, bool, 0600); +MODULE_PARM_DESC(callforward_filter, "only create call forwarding expectations " + "if both endpoints are on different sides " + "(determined by routing information)"); + /* Hooks for NAT */ int (*set_h245_addr_hook) (struct sk_buff ** pskb, unsigned char **data, int dataoff, @@ -77,6 +84,12 @@ int (*nat_h245_hook) (struct sk_buff ** pskb, unsigned char **data, int dataoff, TransportAddress * addr, u_int16_t port, struct ip_conntrack_expect * exp); +int (*nat_callforwarding_hook) (struct sk_buff ** pskb, + struct ip_conntrack * ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, int dataoff, + TransportAddress * addr, u_int16_t port, + struct ip_conntrack_expect * exp); int (*nat_q931_hook) (struct sk_buff ** pskb, struct ip_conntrack * ct, enum ip_conntrack_info ctinfo, @@ -683,6 +696,92 @@ static int expect_h245(struct sk_buff **pskb, struct ip_conntrack *ct, return ret; } +/* Forwarding declaration */ +void ip_conntrack_q931_expect(struct ip_conntrack *new, + struct ip_conntrack_expect *this); + +/****************************************************************************/ +static int expect_callforwarding(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, int dataoff, + TransportAddress * addr) +{ + int dir = CTINFO2DIR(ctinfo); + int ret = 0; + u_int32_t ip; + u_int16_t port; + struct ip_conntrack_expect *exp = NULL; + + /* Read alternativeAddress */ + if (!get_h225_addr(*data, addr, &ip, &port) || port == 0) + return 0; + + /* If the calling party is on the same side of the forward-to party, + * we don't need to track the second call */ + if (callforward_filter) { + struct rtable *rt1, *rt2; + struct flowi fl1 = { + .fl4_dst = ip, + }; + struct flowi fl2 = { + .fl4_dst = ct->tuplehash[!dir].tuple.src.ip, + }; + + if (ip_route_output_key(&rt1, &fl1) == 0) { + if (ip_route_output_key(&rt2, &fl2) == 0) { + if (rt1->rt_gateway == rt2->rt_gateway && + rt1->u.dst.dev == rt2->u.dst.dev) + ret = 1; + dst_release(&rt2->u.dst); + } + dst_release(&rt1->u.dst); + } + if (ret) { + DEBUGP("ip_ct_q931: Call Forwarding not tracked\n"); + return 0; + } + } + + /* Create expect for the second call leg */ + if ((exp = ip_conntrack_expect_alloc(ct)) == NULL) + return -1; + exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip; + exp->tuple.src.u.tcp.port = 0; + exp->tuple.dst.ip = ip; + exp->tuple.dst.u.tcp.port = htons(port); + exp->tuple.dst.protonum = IPPROTO_TCP; + exp->mask.src.ip = 0xFFFFFFFF; + exp->mask.src.u.tcp.port = 0; + exp->mask.dst.ip = 0xFFFFFFFF; + exp->mask.dst.u.tcp.port = 0xFFFF; + exp->mask.dst.protonum = 0xFF; + exp->flags = 0; + + if (ct->tuplehash[dir].tuple.src.ip != + ct->tuplehash[!dir].tuple.dst.ip && nat_callforwarding_hook) { + /* Need NAT */ + ret = nat_callforwarding_hook(pskb, ct, ctinfo, data, dataoff, + addr, port, exp); + } else { /* Conntrack only */ + exp->expectfn = ip_conntrack_q931_expect; + + if (ip_conntrack_expect_related(exp) == 0) { + DEBUGP("ip_ct_q931: expect Call Forwarding " + "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", + NIPQUAD(exp->tuple.src.ip), + ntohs(exp->tuple.src.u.tcp.port), + NIPQUAD(exp->tuple.dst.ip), + ntohs(exp->tuple.dst.u.tcp.port)); + } else + ret = -1; + } + + ip_conntrack_expect_put(exp); + + return ret; +} + /****************************************************************************/ static int process_setup(struct sk_buff **pskb, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo, @@ -878,6 +977,15 @@ static int process_facility(struct sk_buff **pskb, struct ip_conntrack *ct, DEBUGP("ip_ct_q931: Facility\n"); + if (facility->reason.choice == eFacilityReason_callForwarded) { + if (facility->options & eFacility_UUIE_alternativeAddress) + return expect_callforwarding(pskb, ct, ctinfo, data, + dataoff, + &facility-> + alternativeAddress); + return 0; + } + if (facility->options & eFacility_UUIE_h245Address) { ret = expect_h245(pskb, ct, ctinfo, data, dataoff, &facility->h245Address); @@ -1677,7 +1785,6 @@ static int __init init(void) fini(); return ret; } - DEBUGP("ip_ct_h323: init success\n"); return 0; } @@ -1696,6 +1803,7 @@ EXPORT_SYMBOL_GPL(set_ras_addr_hook); EXPORT_SYMBOL_GPL(nat_rtp_rtcp_hook); EXPORT_SYMBOL_GPL(nat_t120_hook); EXPORT_SYMBOL_GPL(nat_h245_hook); +EXPORT_SYMBOL_GPL(nat_callforwarding_hook); EXPORT_SYMBOL_GPL(nat_q931_hook); MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>"); diff --git a/net/ipv4/netfilter/ip_conntrack_helper_h323_types.c b/net/ipv4/netfilter/ip_conntrack_helper_h323_types.c index 022c47b9f6c9..4b359618bedd 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_h323_types.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_h323_types.c @@ -1,4 +1,4 @@ -/* Generated by Jing Min Zhao's ASN.1 parser, Mar 15 2006 +/* Generated by Jing Min Zhao's ASN.1 parser, Apr 20 2006 * * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net> * @@ -1069,8 +1069,8 @@ static field_t _Facility_UUIE_fastStart[] = { /* SEQUENCE OF */ static field_t _Facility_UUIE[] = { /* SEQUENCE */ {FNAME("protocolIdentifier") OID, BYTE, 0, 0, SKIP, 0, NULL}, - {FNAME("alternativeAddress") CHOICE, 3, 7, 7, SKIP | EXT | OPT, 0, - _TransportAddress}, + {FNAME("alternativeAddress") CHOICE, 3, 7, 7, DECODE | EXT | OPT, + offsetof(Facility_UUIE, alternativeAddress), _TransportAddress}, {FNAME("alternativeAliasAddress") SEQOF, SEMI, 0, 0, SKIP | OPT, 0, _Facility_UUIE_alternativeAliasAddress}, {FNAME("conferenceID") OCTSTR, FIXD, 16, 0, SKIP | OPT, 0, NULL}, diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c index 8ccfe17bb253..b020a33e65e9 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c @@ -46,7 +46,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/netfilter.h> #include <linux/ip.h> diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c index a2ac5ce544b2..44889075f3b2 100644 --- a/net/ipv4/netfilter/ip_conntrack_irc.c +++ b/net/ipv4/netfilter/ip_conntrack_irc.c @@ -22,7 +22,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/netfilter.h> #include <linux/ip.h> diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index 01bd7cab9367..33891bb1fde4 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -399,38 +399,54 @@ nfattr_failure: static int ctnetlink_done(struct netlink_callback *cb) { DEBUGP("entered %s\n", __FUNCTION__); + if (cb->args[1]) + ip_conntrack_put((struct ip_conntrack *)cb->args[1]); return 0; } static int ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { - struct ip_conntrack *ct = NULL; + struct ip_conntrack *ct, *last; struct ip_conntrack_tuple_hash *h; struct list_head *i; - u_int32_t *id = (u_int32_t *) &cb->args[1]; DEBUGP("entered %s, last bucket=%lu id=%u\n", __FUNCTION__, cb->args[0], *id); read_lock_bh(&ip_conntrack_lock); - for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) { + for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++) { +restart: + last = (struct ip_conntrack *)cb->args[1]; list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) { h = (struct ip_conntrack_tuple_hash *) i; if (DIRECTION(h) != IP_CT_DIR_ORIGINAL) continue; ct = tuplehash_to_ctrack(h); - if (ct->id <= *id) - continue; + if (last != NULL) { + if (ct == last) { + ip_conntrack_put(last); + cb->args[1] = 0; + last = NULL; + } else + continue; + } if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, IPCTNL_MSG_CT_NEW, - 1, ct) < 0) + 1, ct) < 0) { + nf_conntrack_get(&ct->ct_general); + cb->args[1] = (unsigned long)ct; goto out; - *id = ct->id; + } + } + if (last != NULL) { + ip_conntrack_put(last); + cb->args[1] = 0; + goto restart; } } -out: +out: read_unlock_bh(&ip_conntrack_lock); DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id); @@ -629,7 +645,7 @@ static const size_t cta_min_nat[CTA_NAT_MAX] = { }; static inline int -ctnetlink_parse_nat(struct nfattr *cda[], +ctnetlink_parse_nat(struct nfattr *nat, const struct ip_conntrack *ct, struct ip_nat_range *range) { struct nfattr *tb[CTA_NAT_MAX]; @@ -639,7 +655,7 @@ ctnetlink_parse_nat(struct nfattr *cda[], memset(range, 0, sizeof(*range)); - nfattr_parse_nested(tb, CTA_NAT_MAX, cda[CTA_NAT-1]); + nfattr_parse_nested(tb, CTA_NAT_MAX, nat); if (nfattr_bad_size(tb, CTA_NAT_MAX, cta_min_nat)) return -EINVAL; @@ -854,39 +870,30 @@ ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[]) /* ASSURED bit can only be set */ return -EINVAL; - if (cda[CTA_NAT-1]) { + if (cda[CTA_NAT_SRC-1] || cda[CTA_NAT_DST-1]) { #ifndef CONFIG_IP_NF_NAT_NEEDED return -EINVAL; #else - unsigned int hooknum; struct ip_nat_range range; - if (ctnetlink_parse_nat(cda, ct, &range) < 0) - return -EINVAL; - - DEBUGP("NAT: %u.%u.%u.%u-%u.%u.%u.%u:%u-%u\n", - NIPQUAD(range.min_ip), NIPQUAD(range.max_ip), - htons(range.min.all), htons(range.max.all)); - - /* This is tricky but it works. ip_nat_setup_info needs the - * hook number as parameter, so let's do the correct - * conversion and run away */ - if (status & IPS_SRC_NAT_DONE) - hooknum = NF_IP_POST_ROUTING; /* IP_NAT_MANIP_SRC */ - else if (status & IPS_DST_NAT_DONE) - hooknum = NF_IP_PRE_ROUTING; /* IP_NAT_MANIP_DST */ - else - return -EINVAL; /* Missing NAT flags */ - - DEBUGP("NAT status: %lu\n", - status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK)); - - if (ip_nat_initialized(ct, HOOK2MANIP(hooknum))) - return -EEXIST; - ip_nat_setup_info(ct, &range, hooknum); - - DEBUGP("NAT status after setup_info: %lu\n", - ct->status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK)); + if (cda[CTA_NAT_DST-1]) { + if (ctnetlink_parse_nat(cda[CTA_NAT_DST-1], ct, + &range) < 0) + return -EINVAL; + if (ip_nat_initialized(ct, + HOOK2MANIP(NF_IP_PRE_ROUTING))) + return -EEXIST; + ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING); + } + if (cda[CTA_NAT_SRC-1]) { + if (ctnetlink_parse_nat(cda[CTA_NAT_SRC-1], ct, + &range) < 0) + return -EINVAL; + if (ip_nat_initialized(ct, + HOOK2MANIP(NF_IP_POST_ROUTING))) + return -EEXIST; + ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING); + } #endif } @@ -1106,7 +1113,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, /* implicit 'else' */ /* we only allow nat config for new conntracks */ - if (cda[CTA_NAT-1]) { + if (cda[CTA_NAT_SRC-1] || cda[CTA_NAT_DST-1]) { err = -EINVAL; goto out_unlock; } diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c index 56794797d55b..4ee016c427b4 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c @@ -23,7 +23,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/timer.h> @@ -77,10 +76,10 @@ static inline int gre_key_cmpfn(const struct ip_ct_gre_keymap *km, } /* look up the source key for a given tuple */ -static u_int32_t gre_keymap_lookup(struct ip_conntrack_tuple *t) +static __be16 gre_keymap_lookup(struct ip_conntrack_tuple *t) { struct ip_ct_gre_keymap *km; - u_int32_t key = 0; + __be16 key = 0; read_lock_bh(&ip_ct_gre_lock); km = LIST_FIND(&gre_keymap_list, gre_key_cmpfn, @@ -190,7 +189,7 @@ static int gre_pkt_to_tuple(const struct sk_buff *skb, struct ip_conntrack_tuple *tuple) { struct gre_hdr_pptp _pgrehdr, *pgrehdr; - u_int32_t srckey; + __be16 srckey; struct gre_hdr _grehdr, *grehdr; /* first only delinearize old RFC1701 GRE header */ diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c index d8b14a9010a6..23f1c504586d 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c @@ -224,7 +224,7 @@ icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, } /* See ip_conntrack_proto_tcp.c */ - if (hooknum == NF_IP_PRE_ROUTING && + if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING && nf_ip_checksum(skb, hooknum, skb->nh.iph->ihl * 4, 0)) { if (LOG_INVALID(IPPROTO_ICMP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c index 0416073c5600..2d3612cd5f18 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c @@ -254,7 +254,7 @@ static int do_basic_checks(struct ip_conntrack *conntrack, } DEBUGP("Basic checks passed\n"); - return 0; + return count == 0; } static int new_state(enum ip_conntrack_dir dir, diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index 062b252b58ad..fb920e76ec10 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -19,7 +19,6 @@ * version 2.2 */ -#include <linux/config.h> #include <linux/types.h> #include <linux/sched.h> #include <linux/timer.h> @@ -870,7 +869,7 @@ static int tcp_error(struct sk_buff *skb, * and moreover root might send raw packets. */ /* FIXME: Source route IP option packets --RR */ - if (hooknum == NF_IP_PRE_ROUTING && + if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING && nf_ip_checksum(skb, hooknum, iph->ihl * 4, IPPROTO_TCP)) { if (LOG_INVALID(IPPROTO_TCP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c index 70899868783b..9b2c16b4d2ff 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c @@ -120,7 +120,7 @@ static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, * because the semantic of CHECKSUM_HW is different there * and moreover root might send raw packets. * FIXME: Source route IP option packets --RR */ - if (hooknum == NF_IP_PRE_ROUTING && + if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING && nf_ip_checksum(skb, hooknum, iph->ihl * 4, IPPROTO_UDP)) { if (LOG_INVALID(IPPROTO_UDP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, diff --git a/net/ipv4/netfilter/ip_conntrack_sip.c b/net/ipv4/netfilter/ip_conntrack_sip.c new file mode 100644 index 000000000000..fc87ce0da40d --- /dev/null +++ b/net/ipv4/netfilter/ip_conntrack_sip.c @@ -0,0 +1,471 @@ +/* SIP extension for IP connection tracking. + * + * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar> + * based on RR's ip_conntrack_ftp.c and other modules. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/ctype.h> +#include <linux/skbuff.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/udp.h> + +#include <linux/netfilter.h> +#include <linux/netfilter_ipv4.h> +#include <linux/netfilter_ipv4/ip_conntrack_helper.h> +#include <linux/netfilter_ipv4/ip_conntrack_sip.h> + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>"); +MODULE_DESCRIPTION("SIP connection tracking helper"); + +#define MAX_PORTS 8 +static unsigned short ports[MAX_PORTS]; +static int ports_c; +module_param_array(ports, ushort, &ports_c, 0400); +MODULE_PARM_DESC(ports, "port numbers of sip servers"); + +static unsigned int sip_timeout = SIP_TIMEOUT; +module_param(sip_timeout, uint, 0600); +MODULE_PARM_DESC(sip_timeout, "timeout for the master SIP session"); + +unsigned int (*ip_nat_sip_hook)(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + struct ip_conntrack *ct, + const char **dptr); +EXPORT_SYMBOL_GPL(ip_nat_sip_hook); + +unsigned int (*ip_nat_sdp_hook)(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + struct ip_conntrack_expect *exp, + const char *dptr); +EXPORT_SYMBOL_GPL(ip_nat_sdp_hook); + +int ct_sip_get_info(const char *dptr, size_t dlen, + unsigned int *matchoff, + unsigned int *matchlen, + struct sip_header_nfo *hnfo); +EXPORT_SYMBOL_GPL(ct_sip_get_info); + + +static int digits_len(const char *dptr, const char *limit, int *shift); +static int epaddr_len(const char *dptr, const char *limit, int *shift); +static int skp_digits_len(const char *dptr, const char *limit, int *shift); +static int skp_epaddr_len(const char *dptr, const char *limit, int *shift); + +struct sip_header_nfo ct_sip_hdrs[] = { + { /* Via header */ + .lname = "Via:", + .lnlen = sizeof("Via:") - 1, + .sname = "\r\nv:", + .snlen = sizeof("\r\nv:") - 1, /* rfc3261 "\r\n" */ + .ln_str = "UDP ", + .ln_strlen = sizeof("UDP ") - 1, + .match_len = epaddr_len, + }, + { /* Contact header */ + .lname = "Contact:", + .lnlen = sizeof("Contact:") - 1, + .sname = "\r\nm:", + .snlen = sizeof("\r\nm:") - 1, + .ln_str = "sip:", + .ln_strlen = sizeof("sip:") - 1, + .match_len = skp_epaddr_len + }, + { /* Content length header */ + .lname = "Content-Length:", + .lnlen = sizeof("Content-Length:") - 1, + .sname = "\r\nl:", + .snlen = sizeof("\r\nl:") - 1, + .ln_str = ":", + .ln_strlen = sizeof(":") - 1, + .match_len = skp_digits_len + }, + { /* SDP media info */ + .lname = "\nm=", + .lnlen = sizeof("\nm=") - 1, + .sname = "\rm=", + .snlen = sizeof("\rm=") - 1, + .ln_str = "audio ", + .ln_strlen = sizeof("audio ") - 1, + .match_len = digits_len + }, + { /* SDP owner address*/ + .lname = "\no=", + .lnlen = sizeof("\no=") - 1, + .sname = "\ro=", + .snlen = sizeof("\ro=") - 1, + .ln_str = "IN IP4 ", + .ln_strlen = sizeof("IN IP4 ") - 1, + .match_len = epaddr_len + }, + { /* SDP connection info */ + .lname = "\nc=", + .lnlen = sizeof("\nc=") - 1, + .sname = "\rc=", + .snlen = sizeof("\rc=") - 1, + .ln_str = "IN IP4 ", + .ln_strlen = sizeof("IN IP4 ") - 1, + .match_len = epaddr_len + }, + { /* Requests headers */ + .lname = "sip:", + .lnlen = sizeof("sip:") - 1, + .sname = "sip:", + .snlen = sizeof("sip:") - 1, /* yes, i know.. ;) */ + .ln_str = "@", + .ln_strlen = sizeof("@") - 1, + .match_len = epaddr_len + }, + { /* SDP version header */ + .lname = "\nv=", + .lnlen = sizeof("\nv=") - 1, + .sname = "\rv=", + .snlen = sizeof("\rv=") - 1, + .ln_str = "=", + .ln_strlen = sizeof("=") - 1, + .match_len = digits_len + } +}; +EXPORT_SYMBOL_GPL(ct_sip_hdrs); + +/* get line lenght until first CR or LF seen. */ +int ct_sip_lnlen(const char *line, const char *limit) +{ + const char *k = line; + + while ((line <= limit) && (*line == '\r' || *line == '\n')) + line++; + + while (line <= limit) { + if (*line == '\r' || *line == '\n') + break; + line++; + } + return line - k; +} +EXPORT_SYMBOL_GPL(ct_sip_lnlen); + +/* Linear string search, case sensitive. */ +const char *ct_sip_search(const char *needle, const char *haystack, + size_t needle_len, size_t haystack_len) +{ + const char *limit = haystack + (haystack_len - needle_len); + + while (haystack <= limit) { + if (memcmp(haystack, needle, needle_len) == 0) + return haystack; + haystack++; + } + return NULL; +} +EXPORT_SYMBOL_GPL(ct_sip_search); + +static int digits_len(const char *dptr, const char *limit, int *shift) +{ + int len = 0; + while (dptr <= limit && isdigit(*dptr)) { + dptr++; + len++; + } + return len; +} + +/* get digits lenght, skiping blank spaces. */ +static int skp_digits_len(const char *dptr, const char *limit, int *shift) +{ + for (; dptr <= limit && *dptr == ' '; dptr++) + (*shift)++; + + return digits_len(dptr, limit, shift); +} + +/* Simple ipaddr parser.. */ +static int parse_ipaddr(const char *cp, const char **endp, + u_int32_t *ipaddr, const char *limit) +{ + unsigned long int val; + int i, digit = 0; + + for (i = 0, *ipaddr = 0; cp <= limit && i < 4; i++) { + digit = 0; + if (!isdigit(*cp)) + break; + + val = simple_strtoul(cp, (char **)&cp, 10); + if (val > 0xFF) + return -1; + + ((u_int8_t *)ipaddr)[i] = val; + digit = 1; + + if (*cp != '.') + break; + cp++; + } + if (!digit) + return -1; + + if (endp) + *endp = cp; + + return 0; +} + +/* skip ip address. returns it lenght. */ +static int epaddr_len(const char *dptr, const char *limit, int *shift) +{ + const char *aux = dptr; + u_int32_t ip; + + if (parse_ipaddr(dptr, &dptr, &ip, limit) < 0) { + DEBUGP("ip: %s parse failed.!\n", dptr); + return 0; + } + + /* Port number */ + if (*dptr == ':') { + dptr++; + dptr += digits_len(dptr, limit, shift); + } + return dptr - aux; +} + +/* get address length, skiping user info. */ +static int skp_epaddr_len(const char *dptr, const char *limit, int *shift) +{ + int s = *shift; + + for (; dptr <= limit && *dptr != '@'; dptr++) + (*shift)++; + + if (*dptr == '@') { + dptr++; + (*shift)++; + } else + *shift = s; + + return epaddr_len(dptr, limit, shift); +} + +/* Returns 0 if not found, -1 error parsing. */ +int ct_sip_get_info(const char *dptr, size_t dlen, + unsigned int *matchoff, + unsigned int *matchlen, + struct sip_header_nfo *hnfo) +{ + const char *limit, *aux, *k = dptr; + int shift = 0; + + limit = dptr + (dlen - hnfo->lnlen); + + while (dptr <= limit) { + if ((strncmp(dptr, hnfo->lname, hnfo->lnlen) != 0) && + (strncmp(dptr, hnfo->sname, hnfo->snlen) != 0)) { + dptr++; + continue; + } + aux = ct_sip_search(hnfo->ln_str, dptr, hnfo->ln_strlen, + ct_sip_lnlen(dptr, limit)); + if (!aux) { + DEBUGP("'%s' not found in '%s'.\n", hnfo->ln_str, + hnfo->lname); + return -1; + } + aux += hnfo->ln_strlen; + + *matchlen = hnfo->match_len(aux, limit, &shift); + if (!*matchlen) + return -1; + + *matchoff = (aux - k) + shift; + + DEBUGP("%s match succeeded! - len: %u\n", hnfo->lname, + *matchlen); + return 1; + } + DEBUGP("%s header not found.\n", hnfo->lname); + return 0; +} + +static int set_expected_rtp(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + u_int32_t ipaddr, u_int16_t port, + const char *dptr) +{ + struct ip_conntrack_expect *exp; + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + int ret; + + exp = ip_conntrack_expect_alloc(ct); + if (exp == NULL) + return NF_DROP; + + exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip; + exp->tuple.src.u.udp.port = 0; + exp->tuple.dst.ip = ipaddr; + exp->tuple.dst.u.udp.port = htons(port); + exp->tuple.dst.protonum = IPPROTO_UDP; + + exp->mask.src.ip = 0xFFFFFFFF; + exp->mask.src.u.udp.port = 0; + exp->mask.dst.ip = 0xFFFFFFFF; + exp->mask.dst.u.udp.port = 0xFFFF; + exp->mask.dst.protonum = 0xFF; + + exp->expectfn = NULL; + exp->flags = 0; + + if (ip_nat_sdp_hook) + ret = ip_nat_sdp_hook(pskb, ctinfo, exp, dptr); + else { + if (ip_conntrack_expect_related(exp) != 0) + ret = NF_DROP; + else + ret = NF_ACCEPT; + } + ip_conntrack_expect_put(exp); + + return ret; +} + +static int sip_help(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo) +{ + unsigned int dataoff, datalen; + const char *dptr; + int ret = NF_ACCEPT; + int matchoff, matchlen; + u_int32_t ipaddr; + u_int16_t port; + + /* No Data ? */ + dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); + if (dataoff >= (*pskb)->len) { + DEBUGP("skb->len = %u\n", (*pskb)->len); + return NF_ACCEPT; + } + + ip_ct_refresh(ct, *pskb, sip_timeout * HZ); + + if (!skb_is_nonlinear(*pskb)) + dptr = (*pskb)->data + dataoff; + else { + DEBUGP("Copy of skbuff not supported yet.\n"); + goto out; + } + + if (ip_nat_sip_hook) { + if (!ip_nat_sip_hook(pskb, ctinfo, ct, &dptr)) { + ret = NF_DROP; + goto out; + } + } + + /* After this point NAT, could have mangled skb, so + we need to recalculate payload lenght. */ + datalen = (*pskb)->len - dataoff; + + if (datalen < (sizeof("SIP/2.0 200") - 1)) + goto out; + + /* RTP info only in some SDP pkts */ + if (memcmp(dptr, "INVITE", sizeof("INVITE") - 1) != 0 && + memcmp(dptr, "SIP/2.0 200", sizeof("SIP/2.0 200") - 1) != 0) { + goto out; + } + /* Get ip and port address from SDP packet. */ + if (ct_sip_get_info(dptr, datalen, &matchoff, &matchlen, + &ct_sip_hdrs[POS_CONNECTION]) > 0) { + + /* We'll drop only if there are parse problems. */ + if (parse_ipaddr(dptr + matchoff, NULL, &ipaddr, + dptr + datalen) < 0) { + ret = NF_DROP; + goto out; + } + if (ct_sip_get_info(dptr, datalen, &matchoff, &matchlen, + &ct_sip_hdrs[POS_MEDIA]) > 0) { + + port = simple_strtoul(dptr + matchoff, NULL, 10); + if (port < 1024) { + ret = NF_DROP; + goto out; + } + ret = set_expected_rtp(pskb, ct, ctinfo, + ipaddr, port, dptr); + } + } +out: + return ret; +} + +static struct ip_conntrack_helper sip[MAX_PORTS]; +static char sip_names[MAX_PORTS][10]; + +static void fini(void) +{ + int i; + for (i = 0; i < ports_c; i++) { + DEBUGP("unregistering helper for port %d\n", ports[i]); + ip_conntrack_helper_unregister(&sip[i]); + } +} + +static int __init init(void) +{ + int i, ret; + char *tmpname; + + if (ports_c == 0) + ports[ports_c++] = SIP_PORT; + + for (i = 0; i < ports_c; i++) { + /* Create helper structure */ + memset(&sip[i], 0, sizeof(struct ip_conntrack_helper)); + + sip[i].tuple.dst.protonum = IPPROTO_UDP; + sip[i].tuple.src.u.udp.port = htons(ports[i]); + sip[i].mask.src.u.udp.port = 0xFFFF; + sip[i].mask.dst.protonum = 0xFF; + sip[i].max_expected = 1; + sip[i].timeout = 3 * 60; /* 3 minutes */ + sip[i].me = THIS_MODULE; + sip[i].help = sip_help; + + tmpname = &sip_names[i][0]; + if (ports[i] == SIP_PORT) + sprintf(tmpname, "sip"); + else + sprintf(tmpname, "sip-%d", i); + sip[i].name = tmpname; + + DEBUGP("port #%d: %d\n", i, ports[i]); + + ret = ip_conntrack_helper_register(&sip[i]); + if (ret) { + printk("ERROR registering helper for port %d\n", + ports[i]); + fini(); + return ret; + } + } + return 0; +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index 929d61f7be91..7bd3c22003a2 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -12,7 +12,6 @@ * published by the Free Software Foundation. */ -#include <linux/config.h> #include <linux/types.h> #include <linux/ip.h> #include <linux/netfilter.h> @@ -189,6 +188,11 @@ static int ct_seq_show(struct seq_file *s, void *v) return -ENOSPC; #endif +#ifdef CONFIG_IP_NF_CONNTRACK_SECMARK + if (seq_printf(s, "secmark=%u ", conntrack->secmark)) + return -ENOSPC; +#endif + if (seq_printf(s, "use=%u\n", atomic_read(&conntrack->ct_general.use))) return -ENOSPC; @@ -417,7 +421,7 @@ static unsigned int ip_conntrack_help(unsigned int hooknum, /* This is where we call the helper: as the packet goes out. */ ct = ip_conntrack_get(*pskb, &ctinfo); - if (ct && ct->helper) { + if (ct && ct->helper && ctinfo != IP_CT_RELATED + IP_CT_IS_REPLY) { unsigned int ret; ret = ct->helper->help(pskb, ct, ctinfo); if (ret != NF_ACCEPT) @@ -564,6 +568,8 @@ extern unsigned int ip_ct_generic_timeout; static int log_invalid_proto_min = 0; static int log_invalid_proto_max = 255; +int ip_conntrack_checksum = 1; + static struct ctl_table_header *ip_ct_sysctl_header; static ctl_table ip_ct_sysctl_table[] = { @@ -592,6 +598,14 @@ static ctl_table ip_ct_sysctl_table[] = { .proc_handler = &proc_dointvec, }, { + .ctl_name = NET_IPV4_NF_CONNTRACK_CHECKSUM, + .procname = "ip_conntrack_checksum", + .data = &ip_conntrack_checksum, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT, .procname = "ip_conntrack_tcp_timeout_syn_sent", .data = &ip_ct_tcp_timeout_syn_sent, @@ -946,6 +960,7 @@ EXPORT_SYMBOL_GPL(__ip_conntrack_helper_find_byname); EXPORT_SYMBOL_GPL(ip_conntrack_proto_find_get); EXPORT_SYMBOL_GPL(ip_conntrack_proto_put); EXPORT_SYMBOL_GPL(__ip_conntrack_proto_find); +EXPORT_SYMBOL_GPL(ip_conntrack_checksum); #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) EXPORT_SYMBOL_GPL(ip_ct_port_tuple_to_nfattr); diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c index 5d506e0564d5..cbcaa45370ae 100644 --- a/net/ipv4/netfilter/ip_nat_helper.c +++ b/net/ipv4/netfilter/ip_nat_helper.c @@ -15,7 +15,6 @@ * - make ip_nat_resize_packet more generic (TCP and UDP) * - add ip_nat_mangle_udp_packet */ -#include <linux/config.h> #include <linux/module.h> #include <linux/kmod.h> #include <linux/types.h> diff --git a/net/ipv4/netfilter/ip_nat_helper_h323.c b/net/ipv4/netfilter/ip_nat_helper_h323.c index d45663d137a7..419b878fb467 100644 --- a/net/ipv4/netfilter/ip_nat_helper_h323.c +++ b/net/ipv4/netfilter/ip_nat_helper_h323.c @@ -487,6 +487,80 @@ static int nat_q931(struct sk_buff **pskb, struct ip_conntrack *ct, } /****************************************************************************/ +static void ip_nat_callforwarding_expect(struct ip_conntrack *new, + struct ip_conntrack_expect *this) +{ + struct ip_nat_range range; + + /* This must be a fresh one. */ + BUG_ON(new->status & IPS_NAT_DONE_MASK); + + /* Change src to where master sends to */ + range.flags = IP_NAT_RANGE_MAP_IPS; + range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.ip; + + /* hook doesn't matter, but it has to do source manip */ + ip_nat_setup_info(new, &range, NF_IP_POST_ROUTING); + + /* For DST manip, map port here to where it's expected. */ + range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); + range.min = range.max = this->saved_proto; + range.min_ip = range.max_ip = this->saved_ip; + + /* hook doesn't matter, but it has to do destination manip */ + ip_nat_setup_info(new, &range, NF_IP_PRE_ROUTING); + + ip_conntrack_q931_expect(new, this); +} + +/****************************************************************************/ +static int nat_callforwarding(struct sk_buff **pskb, struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + unsigned char **data, int dataoff, + TransportAddress * addr, u_int16_t port, + struct ip_conntrack_expect *exp) +{ + int dir = CTINFO2DIR(ctinfo); + u_int16_t nated_port; + + /* Set expectations for NAT */ + exp->saved_ip = exp->tuple.dst.ip; + exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip; + exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; + exp->expectfn = ip_nat_callforwarding_expect; + exp->dir = !dir; + + /* Try to get same port: if not, try to change it. */ + for (nated_port = port; nated_port != 0; nated_port++) { + exp->tuple.dst.u.tcp.port = htons(nated_port); + if (ip_conntrack_expect_related(exp) == 0) + break; + } + + if (nated_port == 0) { /* No port available */ + if (net_ratelimit()) + printk("ip_nat_q931: out of TCP ports\n"); + return 0; + } + + /* Modify signal */ + if (!set_h225_addr(pskb, data, dataoff, addr, + ct->tuplehash[!dir].tuple.dst.ip, + nated_port) == 0) { + ip_conntrack_unexpect_related(exp); + return -1; + } + + /* Success */ + DEBUGP("ip_nat_q931: expect Call Forwarding " + "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n", + NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port), + NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port)); + + return 0; +} + +/****************************************************************************/ static int __init init(void) { BUG_ON(set_h245_addr_hook != NULL); @@ -496,6 +570,7 @@ static int __init init(void) BUG_ON(nat_rtp_rtcp_hook != NULL); BUG_ON(nat_t120_hook != NULL); BUG_ON(nat_h245_hook != NULL); + BUG_ON(nat_callforwarding_hook != NULL); BUG_ON(nat_q931_hook != NULL); set_h245_addr_hook = set_h245_addr; @@ -505,6 +580,7 @@ static int __init init(void) nat_rtp_rtcp_hook = nat_rtp_rtcp; nat_t120_hook = nat_t120; nat_h245_hook = nat_h245; + nat_callforwarding_hook = nat_callforwarding; nat_q931_hook = nat_q931; DEBUGP("ip_nat_h323: init success\n"); @@ -521,6 +597,7 @@ static void __exit fini(void) nat_rtp_rtcp_hook = NULL; nat_t120_hook = NULL; nat_h245_hook = NULL; + nat_callforwarding_hook = NULL; nat_q931_hook = NULL; synchronize_net(); } diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c index f3977726ff09..1d149964dc38 100644 --- a/net/ipv4/netfilter/ip_nat_helper_pptp.c +++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c @@ -35,7 +35,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/ip.h> #include <linux/tcp.h> diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c index 96ceabaec402..38acfdf540eb 100644 --- a/net/ipv4/netfilter/ip_nat_proto_gre.c +++ b/net/ipv4/netfilter/ip_nat_proto_gre.c @@ -23,7 +23,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/ip.h> #include <linux/netfilter_ipv4/ip_nat.h> diff --git a/net/ipv4/netfilter/ip_nat_sip.c b/net/ipv4/netfilter/ip_nat_sip.c new file mode 100644 index 000000000000..6ffba63adca2 --- /dev/null +++ b/net/ipv4/netfilter/ip_nat_sip.c @@ -0,0 +1,249 @@ +/* SIP extension for UDP NAT alteration. + * + * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar> + * based on RR's ip_nat_ftp.c and other modules. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <linux/udp.h> + +#include <linux/netfilter_ipv4.h> +#include <linux/netfilter_ipv4/ip_nat.h> +#include <linux/netfilter_ipv4/ip_nat_helper.h> +#include <linux/netfilter_ipv4/ip_conntrack_helper.h> +#include <linux/netfilter_ipv4/ip_conntrack_sip.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>"); +MODULE_DESCRIPTION("SIP NAT helper"); + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +extern struct sip_header_nfo ct_sip_hdrs[]; + +static unsigned int mangle_sip_packet(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + struct ip_conntrack *ct, + const char **dptr, size_t dlen, + char *buffer, int bufflen, + struct sip_header_nfo *hnfo) +{ + unsigned int matchlen, matchoff; + + if (ct_sip_get_info(*dptr, dlen, &matchoff, &matchlen, hnfo) <= 0) + return 0; + + if (!ip_nat_mangle_udp_packet(pskb, ct, ctinfo, + matchoff, matchlen, buffer, bufflen)) + return 0; + + /* We need to reload this. Thanks Patrick. */ + *dptr = (*pskb)->data + (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); + return 1; +} + +static unsigned int ip_nat_sip(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + struct ip_conntrack *ct, + const char **dptr) +{ + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; + unsigned int bufflen, dataoff; + u_int32_t ip; + u_int16_t port; + + dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); + + ip = ct->tuplehash[!dir].tuple.dst.ip; + port = ct->tuplehash[!dir].tuple.dst.u.udp.port; + bufflen = sprintf(buffer, "%u.%u.%u.%u:%u", NIPQUAD(ip), ntohs(port)); + + /* short packet ? */ + if (((*pskb)->len - dataoff) < (sizeof("SIP/2.0") - 1)) + return 0; + + /* Basic rules: requests and responses. */ + if (memcmp(*dptr, "SIP/2.0", sizeof("SIP/2.0") - 1) == 0) { + const char *aux; + + if ((ctinfo) < IP_CT_IS_REPLY) { + mangle_sip_packet(pskb, ctinfo, ct, dptr, + (*pskb)->len - dataoff, + buffer, bufflen, + &ct_sip_hdrs[POS_CONTACT]); + return 1; + } + + if (!mangle_sip_packet(pskb, ctinfo, ct, dptr, + (*pskb)->len - dataoff, + buffer, bufflen, &ct_sip_hdrs[POS_VIA])) + return 0; + + /* This search should ignore case, but later.. */ + aux = ct_sip_search("CSeq:", *dptr, sizeof("CSeq:") - 1, + (*pskb)->len - dataoff); + if (!aux) + return 0; + + if (!ct_sip_search("REGISTER", aux, sizeof("REGISTER"), + ct_sip_lnlen(aux, *dptr + (*pskb)->len - dataoff))) + return 1; + + return mangle_sip_packet(pskb, ctinfo, ct, dptr, + (*pskb)->len - dataoff, + buffer, bufflen, + &ct_sip_hdrs[POS_CONTACT]); + } + if ((ctinfo) < IP_CT_IS_REPLY) { + if (!mangle_sip_packet(pskb, ctinfo, ct, dptr, + (*pskb)->len - dataoff, + buffer, bufflen, &ct_sip_hdrs[POS_VIA])) + return 0; + + /* Mangle Contact if exists only. - watch udp_nat_mangle()! */ + mangle_sip_packet(pskb, ctinfo, ct, dptr, (*pskb)->len - dataoff, + buffer, bufflen, &ct_sip_hdrs[POS_CONTACT]); + return 1; + } + /* This mangle requests headers. */ + return mangle_sip_packet(pskb, ctinfo, ct, dptr, + ct_sip_lnlen(*dptr, + *dptr + (*pskb)->len - dataoff), + buffer, bufflen, &ct_sip_hdrs[POS_REQ_HEADER]); +} + +static int mangle_content_len(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + struct ip_conntrack *ct, + const char *dptr) +{ + unsigned int dataoff, matchoff, matchlen; + char buffer[sizeof("65536")]; + int bufflen; + + dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); + + /* Get actual SDP lenght */ + if (ct_sip_get_info(dptr, (*pskb)->len - dataoff, &matchoff, + &matchlen, &ct_sip_hdrs[POS_SDP_HEADER]) > 0) { + + /* since ct_sip_get_info() give us a pointer passing 'v=' + we need to add 2 bytes in this count. */ + int c_len = (*pskb)->len - dataoff - matchoff + 2; + + /* Now, update SDP lenght */ + if (ct_sip_get_info(dptr, (*pskb)->len - dataoff, &matchoff, + &matchlen, &ct_sip_hdrs[POS_CONTENT]) > 0) { + + bufflen = sprintf(buffer, "%u", c_len); + + return ip_nat_mangle_udp_packet(pskb, ct, ctinfo, + matchoff, matchlen, + buffer, bufflen); + } + } + return 0; +} + +static unsigned int mangle_sdp(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + struct ip_conntrack *ct, + u_int32_t newip, u_int16_t port, + const char *dptr) +{ + char buffer[sizeof("nnn.nnn.nnn.nnn")]; + unsigned int dataoff, bufflen; + + dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); + + /* Mangle owner and contact info. */ + bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip)); + if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff, + buffer, bufflen, &ct_sip_hdrs[POS_OWNER])) + return 0; + + if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff, + buffer, bufflen, &ct_sip_hdrs[POS_CONNECTION])) + return 0; + + /* Mangle media port. */ + bufflen = sprintf(buffer, "%u", port); + if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff, + buffer, bufflen, &ct_sip_hdrs[POS_MEDIA])) + return 0; + + return mangle_content_len(pskb, ctinfo, ct, dptr); +} + +/* So, this packet has hit the connection tracking matching code. + Mangle it, and change the expectation to match the new version. */ +static unsigned int ip_nat_sdp(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + struct ip_conntrack_expect *exp, + const char *dptr) +{ + struct ip_conntrack *ct = exp->master; + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + u_int32_t newip; + u_int16_t port; + + DEBUGP("ip_nat_sdp():\n"); + + /* Connection will come from reply */ + newip = ct->tuplehash[!dir].tuple.dst.ip; + + exp->tuple.dst.ip = newip; + exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port; + exp->dir = !dir; + + /* When you see the packet, we need to NAT it the same as the + this one. */ + exp->expectfn = ip_nat_follow_master; + + /* Try to get same port: if not, try to change it. */ + for (port = ntohs(exp->saved_proto.udp.port); port != 0; port++) { + exp->tuple.dst.u.udp.port = htons(port); + if (ip_conntrack_expect_related(exp) == 0) + break; + } + + if (port == 0) + return NF_DROP; + + if (!mangle_sdp(pskb, ctinfo, ct, newip, port, dptr)) { + ip_conntrack_unexpect_related(exp); + return NF_DROP; + } + return NF_ACCEPT; +} + +static void __exit fini(void) +{ + ip_nat_sip_hook = NULL; + ip_nat_sdp_hook = NULL; + /* Make sure noone calls it, meanwhile. */ + synchronize_net(); +} + +static int __init init(void) +{ + BUG_ON(ip_nat_sip_hook); + BUG_ON(ip_nat_sdp_hook); + ip_nat_sip_hook = ip_nat_sip; + ip_nat_sdp_hook = ip_nat_sdp; + return 0; +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c index c33244263b90..0b1b416759cc 100644 --- a/net/ipv4/netfilter/ip_nat_snmp_basic.c +++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c @@ -43,7 +43,6 @@ * 2000-08-06: Convert to new helper API (Harald Welte). * */ -#include <linux/config.h> #include <linux/in.h> #include <linux/module.h> #include <linux/types.h> @@ -1348,4 +1347,4 @@ static void __exit ip_nat_snmp_basic_fini(void) module_init(ip_nat_snmp_basic_init); module_exit(ip_nat_snmp_basic_fini); -module_param(debug, bool, 0600); +module_param(debug, int, 0600); diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index 67e676783da9..17de077a7901 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -18,7 +18,6 @@ * - now capable of multiple expectations for one master * */ -#include <linux/config.h> #include <linux/types.h> #include <linux/icmp.h> #include <linux/ip.h> diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index b93f0494362f..198ac36db861 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -457,11 +457,19 @@ dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex) if (entry->info->indev) if (entry->info->indev->ifindex == ifindex) return 1; - if (entry->info->outdev) if (entry->info->outdev->ifindex == ifindex) return 1; - +#ifdef CONFIG_BRIDGE_NETFILTER + if (entry->skb->nf_bridge) { + if (entry->skb->nf_bridge->physindev && + entry->skb->nf_bridge->physindev->ifindex == ifindex) + return 1; + if (entry->skb->nf_bridge->physoutdev && + entry->skb->nf_bridge->physoutdev->ifindex == ifindex) + return 1; + } +#endif return 0; } @@ -507,7 +515,7 @@ ipq_rcv_skb(struct sk_buff *skb) if (type <= IPQM_BASE) return; - if (security_netlink_recv(skb)) + if (security_netlink_recv(skb, CAP_NET_ADMIN)) RCV_SKB_FAIL(-EPERM); write_lock_bh(&queue_lock); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index cee3397ec277..fc5bdd5eb7d3 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -14,7 +14,6 @@ * 08 Oct 2005 Harald Welte <lafore@netfilter.org> * - Generalize into "x_tables" layer and "{ip,ip6,arp}_tables" */ -#include <linux/config.h> #include <linux/cache.h> #include <linux/capability.h> #include <linux/skbuff.h> @@ -1761,7 +1760,7 @@ translate_compat_table(const char *name, goto free_newinfo; /* And one copy for every other CPU */ - for_each_cpu(i) + for_each_possible_cpu(i) if (newinfo->entries[i] && newinfo->entries[i] != entry1) memcpy(newinfo->entries[i], entry1, newinfo->size); @@ -2113,7 +2112,8 @@ int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl) return ret; } - if (xt_register_table(table, &bootstrap, newinfo) != 0) { + ret = xt_register_table(table, &bootstrap, newinfo); + if (ret != 0) { xt_free_table_info(newinfo); return ret; } diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index aad9d28c8d71..cbffeae3f565 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -10,7 +10,6 @@ * */ #include <linux/module.h> -#include <linux/config.h> #include <linux/proc_fs.h> #include <linux/jhash.h> #include <linux/bitops.h> @@ -241,25 +240,17 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config) struct iphdr *iph = skb->nh.iph; unsigned long hashval; u_int16_t sport, dport; - struct tcphdr *th; - struct udphdr *uh; - struct icmphdr *ih; + u_int16_t *ports; switch (iph->protocol) { case IPPROTO_TCP: - th = (void *)iph+iph->ihl*4; - sport = ntohs(th->source); - dport = ntohs(th->dest); - break; case IPPROTO_UDP: - uh = (void *)iph+iph->ihl*4; - sport = ntohs(uh->source); - dport = ntohs(uh->dest); - break; + case IPPROTO_SCTP: + case IPPROTO_DCCP: case IPPROTO_ICMP: - ih = (void *)iph+iph->ihl*4; - sport = ntohs(ih->un.echo.id); - dport = (ih->type<<8)|ih->code; + ports = (void *)iph+iph->ihl*4; + sport = ports[0]; + dport = ports[1]; break; default: if (net_ratelimit()) { diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 8b3e7f99b861..ebd94f2abf0d 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -9,7 +9,6 @@ * published by the Free Software Foundation. */ -#include <linux/config.h> #include <linux/types.h> #include <linux/inetdevice.h> #include <linux/ip.h> diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index 2fcf1075b027..736c4b5a86a7 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c @@ -10,7 +10,6 @@ * published by the Free Software Foundation. */ -#include <linux/config.h> #include <linux/ip.h> #include <linux/module.h> #include <linux/netdevice.h> diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 0bba3c2bb786..269bc2067cb8 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -12,7 +12,6 @@ * published by the Free Software Foundation. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ip.h> @@ -147,6 +146,7 @@ static void send_reset(struct sk_buff *oldskb, int hook) /* This packet will not be the same as the other: clear nf fields */ nf_reset(nskb); nskb->nfmark = 0; + skb_init_secmark(nskb); tcph = (struct tcphdr *)((u_int32_t*)nskb->nh.iph + nskb->nh.iph->ihl); diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index c84cc03389d8..d7dd7fe7051c 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -47,7 +47,6 @@ */ #include <linux/module.h> -#include <linux/config.h> #include <linux/spinlock.h> #include <linux/socket.h> #include <linux/skbuff.h> diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c index 7c6836c4646e..92980ab8ce48 100644 --- a/net/ipv4/netfilter/ipt_hashlimit.c +++ b/net/ipv4/netfilter/ipt_hashlimit.c @@ -28,9 +28,6 @@ #include <linux/jhash.h> #include <linux/slab.h> #include <linux/vmalloc.h> -#include <linux/tcp.h> -#include <linux/udp.h> -#include <linux/sctp.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/list.h> @@ -83,6 +80,7 @@ struct ipt_hashlimit_htable { /* used internally */ spinlock_t lock; /* lock for list_head */ u_int32_t rnd; /* random seed for hash */ + int rnd_initialized; struct timer_list timer; /* timer for gc */ atomic_t count; /* number entries in table */ @@ -137,8 +135,10 @@ __dsthash_alloc_init(struct ipt_hashlimit_htable *ht, struct dsthash_dst *dst) /* initialize hash with random val at the time we allocate * the first hashtable entry */ - if (!ht->rnd) + if (!ht->rnd_initialized) { get_random_bytes(&ht->rnd, 4); + ht->rnd_initialized = 1; + } if (ht->cfg.max && atomic_read(&ht->count) >= ht->cfg.max) { @@ -217,7 +217,7 @@ static int htable_create(struct ipt_hashlimit_info *minfo) atomic_set(&hinfo->count, 0); atomic_set(&hinfo->use, 1); - hinfo->rnd = 0; + hinfo->rnd_initialized = 0; spin_lock_init(&hinfo->lock); hinfo->pde = create_proc_entry(minfo->name, 0, hashlimit_procdir); if (!hinfo->pde) { @@ -381,49 +381,6 @@ static inline void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now) dh->rateinfo.credit = dh->rateinfo.credit_cap; } -static inline int get_ports(const struct sk_buff *skb, int offset, - u16 ports[2]) -{ - union { - struct tcphdr th; - struct udphdr uh; - sctp_sctphdr_t sctph; - } hdr_u, *ptr_u; - - /* Must not be a fragment. */ - if (offset) - return 1; - - /* Must be big enough to read ports (both UDP and TCP have - them at the start). */ - ptr_u = skb_header_pointer(skb, skb->nh.iph->ihl*4, 8, &hdr_u); - if (!ptr_u) - return 1; - - switch (skb->nh.iph->protocol) { - case IPPROTO_TCP: - ports[0] = ptr_u->th.source; - ports[1] = ptr_u->th.dest; - break; - case IPPROTO_UDP: - ports[0] = ptr_u->uh.source; - ports[1] = ptr_u->uh.dest; - break; - case IPPROTO_SCTP: - ports[0] = ptr_u->sctph.source; - ports[1] = ptr_u->sctph.dest; - break; - default: - /* all other protocols don't supprot per-port hash - * buckets */ - ports[0] = ports[1] = 0; - break; - } - - return 0; -} - - static int hashlimit_match(const struct sk_buff *skb, const struct net_device *in, @@ -449,8 +406,22 @@ hashlimit_match(const struct sk_buff *skb, dst.src_ip = skb->nh.iph->saddr; if (hinfo->cfg.mode & IPT_HASHLIMIT_HASH_DPT ||hinfo->cfg.mode & IPT_HASHLIMIT_HASH_SPT) { - u_int16_t ports[2]; - if (get_ports(skb, offset, ports)) { + u_int16_t _ports[2], *ports; + + switch (skb->nh.iph->protocol) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_SCTP: + case IPPROTO_DCCP: + ports = skb_header_pointer(skb, skb->nh.iph->ihl*4, + sizeof(_ports), &_ports); + break; + default: + _ports[0] = _ports[1] = 0; + ports = _ports; + break; + } + if (!ports) { /* We've been asked to examine this packet, and we can't. Hence, no choice but to drop. */ *hotdrop = 1; @@ -561,7 +532,7 @@ static void hashlimit_destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize) { - struct ipt_hashlimit_info *r = (struct ipt_hashlimit_info *) matchinfo; + struct ipt_hashlimit_info *r = matchinfo; htable_put(r->hinfo); } diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index b847ee409efb..61a2139f9cfd 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c @@ -1,1007 +1,499 @@ -/* Kernel module to check if the source address has been seen recently. */ -/* Copyright 2002-2003, Stephen Frost, 2.5.x port by laforge@netfilter.org */ -/* Author: Stephen Frost <sfrost@snowman.net> */ -/* Project Page: http://snowman.net/projects/ipt_recent/ */ -/* This software is distributed under the terms of the GPL, Version 2 */ -/* This copyright does not cover user programs that use kernel services - * by normal system calls. */ - -#include <linux/module.h> -#include <linux/skbuff.h> +/* + * Copyright (c) 2006 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This is a replacement of the old ipt_recent module, which carried the + * following copyright notice: + * + * Author: Stephen Frost <sfrost@snowman.net> + * Copyright 2002-2003, Stephen Frost, 2.5.x port by laforge@netfilter.org + */ +#include <linux/init.h> +#include <linux/moduleparam.h> #include <linux/proc_fs.h> -#include <linux/spinlock.h> -#include <linux/interrupt.h> -#include <asm/uaccess.h> +#include <linux/seq_file.h> +#include <linux/string.h> #include <linux/ctype.h> -#include <linux/ip.h> -#include <linux/vmalloc.h> -#include <linux/moduleparam.h> +#include <linux/list.h> +#include <linux/random.h> +#include <linux/jhash.h> +#include <linux/bitops.h> +#include <linux/skbuff.h> +#include <linux/inet.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv4/ipt_recent.h> -#undef DEBUG -#define HASH_LOG 9 +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_DESCRIPTION("IP tables recently seen matching module"); +MODULE_LICENSE("GPL"); -/* Defaults, these can be overridden on the module command-line. */ static unsigned int ip_list_tot = 100; static unsigned int ip_pkt_list_tot = 20; static unsigned int ip_list_hash_size = 0; static unsigned int ip_list_perms = 0644; -#ifdef DEBUG -static int debug = 1; -#endif - -static char version[] = -KERN_INFO RECENT_NAME " " RECENT_VER ": Stephen Frost <sfrost@snowman.net>. http://snowman.net/projects/ipt_recent/\n"; - -MODULE_AUTHOR("Stephen Frost <sfrost@snowman.net>"); -MODULE_DESCRIPTION("IP tables recently seen matching module " RECENT_VER); -MODULE_LICENSE("GPL"); module_param(ip_list_tot, uint, 0400); module_param(ip_pkt_list_tot, uint, 0400); module_param(ip_list_hash_size, uint, 0400); module_param(ip_list_perms, uint, 0400); -#ifdef DEBUG -module_param(debug, bool, 0600); -MODULE_PARM_DESC(debug,"enable debugging output"); -#endif -MODULE_PARM_DESC(ip_list_tot,"number of IPs to remember per list"); -MODULE_PARM_DESC(ip_pkt_list_tot,"number of packets per IP to remember"); -MODULE_PARM_DESC(ip_list_hash_size,"size of hash table used to look up IPs"); -MODULE_PARM_DESC(ip_list_perms,"permissions on /proc/net/ipt_recent/* files"); - -/* Structure of our list of recently seen addresses. */ -struct recent_ip_list { - u_int32_t addr; - u_int8_t ttl; - unsigned long last_seen; - unsigned long *last_pkts; - u_int32_t oldest_pkt; - u_int32_t hash_entry; - u_int32_t time_pos; -}; - -struct time_info_list { - u_int32_t position; - u_int32_t time; +MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list"); +MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP to remember (max. 255)"); +MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs"); +MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/ipt_recent/* files"); + + +struct recent_entry { + struct list_head list; + struct list_head lru_list; + u_int32_t addr; + u_int8_t ttl; + u_int8_t index; + u_int16_t nstamps; + unsigned long stamps[0]; }; -/* Structure of our linked list of tables of recent lists. */ -struct recent_ip_tables { - char name[IPT_RECENT_NAME_LEN]; - int count; - int time_pos; - struct recent_ip_list *table; - struct recent_ip_tables *next; - spinlock_t list_lock; - int *hash_table; - struct time_info_list *time_info; +struct recent_table { + struct list_head list; + char name[IPT_RECENT_NAME_LEN]; #ifdef CONFIG_PROC_FS - struct proc_dir_entry *status_proc; -#endif /* CONFIG_PROC_FS */ + struct proc_dir_entry *proc; +#endif + unsigned int refcnt; + unsigned int entries; + struct list_head lru_list; + struct list_head iphash[0]; }; -/* Our current list of addresses we have recently seen. - * Only added to on a --set, and only updated on --set || --update - */ -static struct recent_ip_tables *r_tables = NULL; - -/* We protect r_list with this spinlock so two processors are not modifying - * the list at the same time. - */ +static LIST_HEAD(tables); static DEFINE_SPINLOCK(recent_lock); +static DEFINE_MUTEX(recent_mutex); #ifdef CONFIG_PROC_FS -/* Our /proc/net/ipt_recent entry */ -static struct proc_dir_entry *proc_net_ipt_recent = NULL; -#endif - -/* Function declaration for later. */ -static int -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - int *hotdrop); - -/* Function to hash a given address into the hash table of table_size size */ -static int hash_func(unsigned int addr, int table_size) -{ - int result = 0; - unsigned int value = addr; - do { result ^= value; } while((value >>= HASH_LOG)); - -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": %d = hash_func(%u,%d)\n", - result & (table_size - 1), - addr, - table_size); +static struct proc_dir_entry *proc_dir; +static struct file_operations recent_fops; #endif - return(result & (table_size - 1)); -} +static u_int32_t hash_rnd; +static int hash_rnd_initted; -#ifdef CONFIG_PROC_FS -/* This is the function which produces the output for our /proc output - * interface which lists each IP address, the last seen time and the - * other recent times the address was seen. - */ - -static int ip_recent_get_info(char *buffer, char **start, off_t offset, int length, int *eof, void *data) +static unsigned int recent_entry_hash(u_int32_t addr) { - int len = 0, count, last_len = 0, pkt_count; - off_t pos = 0; - off_t begin = 0; - struct recent_ip_tables *curr_table; - - curr_table = (struct recent_ip_tables*) data; - - spin_lock_bh(&curr_table->list_lock); - for(count = 0; count < ip_list_tot; count++) { - if(!curr_table->table[count].addr) continue; - last_len = len; - len += sprintf(buffer+len,"src=%u.%u.%u.%u ",NIPQUAD(curr_table->table[count].addr)); - len += sprintf(buffer+len,"ttl: %u ",curr_table->table[count].ttl); - len += sprintf(buffer+len,"last_seen: %lu ",curr_table->table[count].last_seen); - len += sprintf(buffer+len,"oldest_pkt: %u ",curr_table->table[count].oldest_pkt); - len += sprintf(buffer+len,"last_pkts: %lu",curr_table->table[count].last_pkts[0]); - for(pkt_count = 1; pkt_count < ip_pkt_list_tot; pkt_count++) { - if(!curr_table->table[count].last_pkts[pkt_count]) break; - len += sprintf(buffer+len,", %lu",curr_table->table[count].last_pkts[pkt_count]); - } - len += sprintf(buffer+len,"\n"); - pos = begin + len; - if(pos < offset) { len = 0; begin = pos; } - if(pos > offset + length) { len = last_len; break; } + if (!hash_rnd_initted) { + get_random_bytes(&hash_rnd, 4); + hash_rnd_initted = 1; } - - *start = buffer + (offset - begin); - len -= (offset - begin); - if(len > length) len = length; - - spin_unlock_bh(&curr_table->list_lock); - return len; + return jhash_1word(addr, hash_rnd) & (ip_list_hash_size - 1); } -/* ip_recent_ctrl provides an interface for users to modify the table - * directly. This allows adding entries, removing entries, and - * flushing the entire table. - * This is done by opening up the appropriate table for writing and - * sending one of: - * xx.xx.xx.xx -- Add entry to table with current time - * +xx.xx.xx.xx -- Add entry to table with current time - * -xx.xx.xx.xx -- Remove entry from table - * clear -- Flush table, remove all entries - */ - -static int ip_recent_ctrl(struct file *file, const char __user *input, unsigned long size, void *data) +static struct recent_entry * +recent_entry_lookup(const struct recent_table *table, u_int32_t addr, u_int8_t ttl) { - static const u_int32_t max[4] = { 0xffffffff, 0xffffff, 0xffff, 0xff }; - u_int32_t val; - int base, used = 0; - char c, *cp; - union iaddr { - uint8_t bytes[4]; - uint32_t word; - } res; - uint8_t *pp = res.bytes; - int digit; - - char buffer[20]; - int len, check_set = 0, count; - u_int32_t addr = 0; - struct sk_buff *skb; - struct ipt_recent_info *info; - struct recent_ip_tables *curr_table; - - curr_table = (struct recent_ip_tables*) data; - - if(size > 20) len = 20; else len = size; - - if(copy_from_user(buffer,input,len)) return -EFAULT; - - if(len < 20) buffer[len] = '\0'; - -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": ip_recent_ctrl len: %d, input: `%.20s'\n",len,buffer); -#endif + struct recent_entry *e; + unsigned int h; + + h = recent_entry_hash(addr); + list_for_each_entry(e, &table->iphash[h], list) + if (e->addr == addr && (ttl == e->ttl || !ttl || !e->ttl)) + return e; + return NULL; +} - cp = buffer; - while(isspace(*cp)) { cp++; used++; if(used >= len-5) return used; } +static void recent_entry_remove(struct recent_table *t, struct recent_entry *e) +{ + list_del(&e->list); + list_del(&e->lru_list); + kfree(e); + t->entries--; +} - /* Check if we are asked to flush the entire table */ - if(!memcmp(cp,"clear",5)) { - used += 5; - spin_lock_bh(&curr_table->list_lock); - curr_table->time_pos = 0; - for(count = 0; count < ip_list_hash_size; count++) { - curr_table->hash_table[count] = -1; - } - for(count = 0; count < ip_list_tot; count++) { - curr_table->table[count].last_seen = 0; - curr_table->table[count].addr = 0; - curr_table->table[count].ttl = 0; - memset(curr_table->table[count].last_pkts,0,ip_pkt_list_tot*sizeof(unsigned long)); - curr_table->table[count].oldest_pkt = 0; - curr_table->table[count].time_pos = 0; - curr_table->time_info[count].position = count; - curr_table->time_info[count].time = 0; - } - spin_unlock_bh(&curr_table->list_lock); - return used; - } +static struct recent_entry * +recent_entry_init(struct recent_table *t, u_int32_t addr, u_int8_t ttl) +{ + struct recent_entry *e; - check_set = IPT_RECENT_SET; - switch(*cp) { - case '+': check_set = IPT_RECENT_SET; cp++; used++; break; - case '-': check_set = IPT_RECENT_REMOVE; cp++; used++; break; - default: if(!isdigit(*cp)) return (used+1); break; + if (t->entries >= ip_list_tot) { + e = list_entry(t->lru_list.next, struct recent_entry, lru_list); + recent_entry_remove(t, e); } + e = kmalloc(sizeof(*e) + sizeof(e->stamps[0]) * ip_pkt_list_tot, + GFP_ATOMIC); + if (e == NULL) + return NULL; + e->addr = addr; + e->ttl = ttl; + e->stamps[0] = jiffies; + e->nstamps = 1; + e->index = 1; + list_add_tail(&e->list, &t->iphash[recent_entry_hash(addr)]); + list_add_tail(&e->lru_list, &t->lru_list); + t->entries++; + return e; +} -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": ip_recent_ctrl cp: `%c', check_set: %d\n",*cp,check_set); -#endif - /* Get addr (effectively inet_aton()) */ - /* Shamelessly stolen from libc, a function in the kernel for doing - * this would, of course, be greatly preferred, but our options appear - * to be rather limited, so we will just do it ourselves here. - */ - res.word = 0; - - c = *cp; - for(;;) { - if(!isdigit(c)) return used; - val = 0; base = 10; digit = 0; - if(c == '0') { - c = *++cp; - if(c == 'x' || c == 'X') base = 16, c = *++cp; - else { base = 8; digit = 1; } - } - for(;;) { - if(isascii(c) && isdigit(c)) { - if(base == 8 && (c == '8' || c == '0')) return used; - val = (val * base) + (c - '0'); - c = *++cp; - digit = 1; - } else if(base == 16 && isascii(c) && isxdigit(c)) { - val = (val << 4) | (c + 10 - (islower(c) ? 'a' : 'A')); - c = *++cp; - digit = 1; - } else break; - } - if(c == '.') { - if(pp > res.bytes + 2 || val > 0xff) return used; - *pp++ = val; - c = *++cp; - } else break; - } - used = cp - buffer; - if(c != '\0' && (!isascii(c) || !isspace(c))) return used; - if(c == '\n') used++; - if(!digit) return used; +static void recent_entry_update(struct recent_table *t, struct recent_entry *e) +{ + e->stamps[e->index++] = jiffies; + if (e->index > e->nstamps) + e->nstamps = e->index; + e->index %= ip_pkt_list_tot; + list_move_tail(&e->lru_list, &t->lru_list); +} - if(val > max[pp - res.bytes]) return used; - addr = res.word | htonl(val); +static struct recent_table *recent_table_lookup(const char *name) +{ + struct recent_table *t; - if(!addr && check_set == IPT_RECENT_SET) return used; + list_for_each_entry(t, &tables, list) + if (!strcmp(t->name, name)) + return t; + return NULL; +} -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": ip_recent_ctrl c: %c, addr: %u used: %d\n",c,addr,used); -#endif +static void recent_table_flush(struct recent_table *t) +{ + struct recent_entry *e, *next; + unsigned int i; - /* Set up and just call match */ - info = kmalloc(sizeof(struct ipt_recent_info),GFP_KERNEL); - if(!info) { return -ENOMEM; } - info->seconds = 0; - info->hit_count = 0; - info->check_set = check_set; - info->invert = 0; - info->side = IPT_RECENT_SOURCE; - strncpy(info->name,curr_table->name,IPT_RECENT_NAME_LEN); - info->name[IPT_RECENT_NAME_LEN-1] = '\0'; - - skb = kmalloc(sizeof(struct sk_buff),GFP_KERNEL); - if (!skb) { - used = -ENOMEM; - goto out_free_info; - } - skb->nh.iph = kmalloc(sizeof(struct iphdr),GFP_KERNEL); - if (!skb->nh.iph) { - used = -ENOMEM; - goto out_free_skb; + for (i = 0; i < ip_list_hash_size; i++) { + list_for_each_entry_safe(e, next, &t->iphash[i], list) + recent_entry_remove(t, e); } - - skb->nh.iph->saddr = addr; - skb->nh.iph->daddr = 0; - /* Clear ttl since we have no way of knowing it */ - skb->nh.iph->ttl = 0; - match(skb,NULL,NULL,NULL,info,0,0,NULL); - - kfree(skb->nh.iph); -out_free_skb: - kfree(skb); -out_free_info: - kfree(info); - -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": Leaving ip_recent_ctrl addr: %u used: %d\n",addr,used); -#endif - return used; } -#endif /* CONFIG_PROC_FS */ - -/* 'match' is our primary function, called by the kernel whenever a rule is - * hit with our module as an option to it. - * What this function does depends on what was specifically asked of it by - * the user: - * --set -- Add or update last seen time of the source address of the packet - * -- matchinfo->check_set == IPT_RECENT_SET - * --rcheck -- Just check if the source address is in the list - * -- matchinfo->check_set == IPT_RECENT_CHECK - * --update -- If the source address is in the list, update last_seen - * -- matchinfo->check_set == IPT_RECENT_UPDATE - * --remove -- If the source address is in the list, remove it - * -- matchinfo->check_set == IPT_RECENT_REMOVE - * --seconds -- Option to --rcheck/--update, only match if last_seen within seconds - * -- matchinfo->seconds - * --hitcount -- Option to --rcheck/--update, only match if seen hitcount times - * -- matchinfo->hit_count - * --seconds and --hitcount can be combined - */ static int -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - int *hotdrop) +ipt_recent_match(const struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + const struct xt_match *match, const void *matchinfo, + int offset, unsigned int protoff, int *hotdrop) { - int pkt_count, hits_found, ans; - unsigned long now; const struct ipt_recent_info *info = matchinfo; - u_int32_t addr = 0, time_temp; - u_int8_t ttl = skb->nh.iph->ttl; - int *hash_table; - int orig_hash_result, hash_result, temp, location = 0, time_loc, end_collision_chain = -1; - struct time_info_list *time_info; - struct recent_ip_tables *curr_table; - struct recent_ip_tables *last_table; - struct recent_ip_list *r_list; - -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": match() called\n"); -#endif - - /* Default is false ^ info->invert */ - ans = info->invert; + struct recent_table *t; + struct recent_entry *e; + u_int32_t addr; + u_int8_t ttl; + int ret = info->invert; -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": match(): name = '%s'\n",info->name); -#endif + if (info->side == IPT_RECENT_DEST) + addr = skb->nh.iph->daddr; + else + addr = skb->nh.iph->saddr; - /* if out != NULL then routing has been done and TTL changed. - * We change it back here internally for match what came in before routing. */ - if(out) ttl++; + ttl = skb->nh.iph->ttl; + /* use TTL as seen before forwarding */ + if (out && !skb->sk) + ttl++; - /* Find the right table */ spin_lock_bh(&recent_lock); - curr_table = r_tables; - while( (last_table = curr_table) && strncmp(info->name,curr_table->name,IPT_RECENT_NAME_LEN) && (curr_table = curr_table->next) ); - -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": match(): table found('%s')\n",info->name); -#endif - - spin_unlock_bh(&recent_lock); - - /* Table with this name not found, match impossible */ - if(!curr_table) { return ans; } - - /* Make sure no one is changing the list while we work with it */ - spin_lock_bh(&curr_table->list_lock); - - r_list = curr_table->table; - if(info->side == IPT_RECENT_DEST) addr = skb->nh.iph->daddr; else addr = skb->nh.iph->saddr; - - if(!addr) { -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": match() address (%u) invalid, leaving.\n",addr); -#endif - spin_unlock_bh(&curr_table->list_lock); - return ans; - } - -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": match(): checking table, addr: %u, ttl: %u, orig_ttl: %u\n",addr,ttl,skb->nh.iph->ttl); -#endif - - /* Get jiffies now in case they changed while we were waiting for a lock */ - now = jiffies; - hash_table = curr_table->hash_table; - time_info = curr_table->time_info; - - orig_hash_result = hash_result = hash_func(addr,ip_list_hash_size); - /* Hash entry at this result used */ - /* Check for TTL match if requested. If TTL is zero then a match would never - * happen, so match regardless of existing TTL in that case. Zero means the - * entry was added via the /proc interface anyway, so we will just use the - * first TTL we get for that IP address. */ - if(info->check_set & IPT_RECENT_TTL) { - while(hash_table[hash_result] != -1 && !(r_list[hash_table[hash_result]].addr == addr && - (!r_list[hash_table[hash_result]].ttl || r_list[hash_table[hash_result]].ttl == ttl))) { - /* Collision in hash table */ - hash_result = (hash_result + 1) % ip_list_hash_size; - } - } else { - while(hash_table[hash_result] != -1 && r_list[hash_table[hash_result]].addr != addr) { - /* Collision in hash table */ - hash_result = (hash_result + 1) % ip_list_hash_size; - } - } - - if(hash_table[hash_result] == -1 && !(info->check_set & IPT_RECENT_SET)) { - /* IP not in list and not asked to SET */ - spin_unlock_bh(&curr_table->list_lock); - return ans; - } - - /* Check if we need to handle the collision, do not need to on REMOVE */ - if(orig_hash_result != hash_result && !(info->check_set & IPT_RECENT_REMOVE)) { -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": match(): Collision in hash table. (or: %d,hr: %d,oa: %u,ha: %u)\n", - orig_hash_result, - hash_result, - r_list[hash_table[orig_hash_result]].addr, - addr); -#endif - - /* We had a collision. - * orig_hash_result is where we started, hash_result is where we ended up. - * So, swap them because we are likely to see the same guy again sooner */ -#ifdef DEBUG - if(debug) { - printk(KERN_INFO RECENT_NAME ": match(): Collision; hash_table[orig_hash_result] = %d\n",hash_table[orig_hash_result]); - printk(KERN_INFO RECENT_NAME ": match(): Collision; r_list[hash_table[orig_hash_result]].hash_entry = %d\n", - r_list[hash_table[orig_hash_result]].hash_entry); - } -#endif - - r_list[hash_table[orig_hash_result]].hash_entry = hash_result; - - - temp = hash_table[orig_hash_result]; -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": match(): Collision; hash_table[hash_result] = %d\n",hash_table[hash_result]); -#endif - hash_table[orig_hash_result] = hash_table[hash_result]; - hash_table[hash_result] = temp; - temp = hash_result; - hash_result = orig_hash_result; - orig_hash_result = temp; - time_info[r_list[hash_table[orig_hash_result]].time_pos].position = hash_table[orig_hash_result]; - if(hash_table[hash_result] != -1) { - r_list[hash_table[hash_result]].hash_entry = hash_result; - time_info[r_list[hash_table[hash_result]].time_pos].position = hash_table[hash_result]; - } - -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": match(): Collision handled.\n"); -#endif + t = recent_table_lookup(info->name); + e = recent_entry_lookup(t, addr, + info->check_set & IPT_RECENT_TTL ? ttl : 0); + if (e == NULL) { + if (!(info->check_set & IPT_RECENT_SET)) + goto out; + e = recent_entry_init(t, addr, ttl); + if (e == NULL) + *hotdrop = 1; + ret ^= 1; + goto out; } - if(hash_table[hash_result] == -1) { -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": match(): New table entry. (hr: %d,ha: %u)\n", - hash_result, addr); -#endif - - /* New item found and IPT_RECENT_SET, so we need to add it */ - location = time_info[curr_table->time_pos].position; - hash_table[r_list[location].hash_entry] = -1; - hash_table[hash_result] = location; - memset(r_list[location].last_pkts,0,ip_pkt_list_tot*sizeof(unsigned long)); - r_list[location].time_pos = curr_table->time_pos; - r_list[location].addr = addr; - r_list[location].ttl = ttl; - r_list[location].last_seen = now; - r_list[location].oldest_pkt = 1; - r_list[location].last_pkts[0] = now; - r_list[location].hash_entry = hash_result; - time_info[curr_table->time_pos].time = r_list[location].last_seen; - curr_table->time_pos = (curr_table->time_pos + 1) % ip_list_tot; - - ans = !info->invert; - } else { -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": match(): Existing table entry. (hr: %d,ha: %u)\n", - hash_result, - addr); -#endif - - /* Existing item found */ - location = hash_table[hash_result]; - /* We have a match on address, now to make sure it meets all requirements for a - * full match. */ - if(info->check_set & IPT_RECENT_CHECK || info->check_set & IPT_RECENT_UPDATE) { - if(!info->seconds && !info->hit_count) ans = !info->invert; else ans = info->invert; - if(info->seconds && !info->hit_count) { - if(time_before_eq(now,r_list[location].last_seen+info->seconds*HZ)) ans = !info->invert; else ans = info->invert; - } - if(info->seconds && info->hit_count) { - for(pkt_count = 0, hits_found = 0; pkt_count < ip_pkt_list_tot; pkt_count++) { - if(r_list[location].last_pkts[pkt_count] == 0) break; - if(time_before_eq(now,r_list[location].last_pkts[pkt_count]+info->seconds*HZ)) hits_found++; - } - if(hits_found >= info->hit_count) ans = !info->invert; else ans = info->invert; - } - if(info->hit_count && !info->seconds) { - for(pkt_count = 0, hits_found = 0; pkt_count < ip_pkt_list_tot; pkt_count++) { - if(r_list[location].last_pkts[pkt_count] == 0) break; - hits_found++; - } - if(hits_found >= info->hit_count) ans = !info->invert; else ans = info->invert; + if (info->check_set & IPT_RECENT_SET) + ret ^= 1; + else if (info->check_set & IPT_RECENT_REMOVE) { + recent_entry_remove(t, e); + ret ^= 1; + } else if (info->check_set & (IPT_RECENT_CHECK | IPT_RECENT_UPDATE)) { + unsigned long t = jiffies - info->seconds * HZ; + unsigned int i, hits = 0; + + for (i = 0; i < e->nstamps; i++) { + if (info->seconds && time_after(t, e->stamps[i])) + continue; + if (++hits >= info->hit_count) { + ret ^= 1; + break; } } -#ifdef DEBUG - if(debug) { - if(ans) - printk(KERN_INFO RECENT_NAME ": match(): match addr: %u\n",addr); - else - printk(KERN_INFO RECENT_NAME ": match(): no match addr: %u\n",addr); - } -#endif - - /* If and only if we have been asked to SET, or to UPDATE (on match) do we add the - * current timestamp to the last_seen. */ - if((info->check_set & IPT_RECENT_SET && (ans = !info->invert)) || (info->check_set & IPT_RECENT_UPDATE && ans)) { -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": match(): SET or UPDATE; updating time info.\n"); -#endif - /* Have to update our time info */ - time_loc = r_list[location].time_pos; - time_info[time_loc].time = now; - time_info[time_loc].position = location; - while((time_info[(time_loc+1) % ip_list_tot].time < time_info[time_loc].time) && ((time_loc+1) % ip_list_tot) != curr_table->time_pos) { - time_temp = time_info[time_loc].time; - time_info[time_loc].time = time_info[(time_loc+1)%ip_list_tot].time; - time_info[(time_loc+1)%ip_list_tot].time = time_temp; - time_temp = time_info[time_loc].position; - time_info[time_loc].position = time_info[(time_loc+1)%ip_list_tot].position; - time_info[(time_loc+1)%ip_list_tot].position = time_temp; - r_list[time_info[time_loc].position].time_pos = time_loc; - r_list[time_info[(time_loc+1)%ip_list_tot].position].time_pos = (time_loc+1)%ip_list_tot; - time_loc = (time_loc+1) % ip_list_tot; - } - r_list[location].time_pos = time_loc; - r_list[location].ttl = ttl; - r_list[location].last_pkts[r_list[location].oldest_pkt] = now; - r_list[location].oldest_pkt = ++r_list[location].oldest_pkt % ip_pkt_list_tot; - r_list[location].last_seen = now; - } - /* If we have been asked to remove the entry from the list, just set it to 0 */ - if(info->check_set & IPT_RECENT_REMOVE) { -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": match(): REMOVE; clearing entry (or: %d, hr: %d).\n",orig_hash_result,hash_result); -#endif - /* Check if this is part of a collision chain */ - while(hash_table[(orig_hash_result+1) % ip_list_hash_size] != -1) { - orig_hash_result++; - if(hash_func(r_list[hash_table[orig_hash_result]].addr,ip_list_hash_size) == hash_result) { - /* Found collision chain, how deep does this rabbit hole go? */ -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": match(): REMOVE; found collision chain.\n"); -#endif - end_collision_chain = orig_hash_result; - } - } - if(end_collision_chain != -1) { -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": match(): REMOVE; part of collision chain, moving to end.\n"); -#endif - /* Part of a collision chain, swap it with the end of the chain - * before removing. */ - r_list[hash_table[end_collision_chain]].hash_entry = hash_result; - temp = hash_table[end_collision_chain]; - hash_table[end_collision_chain] = hash_table[hash_result]; - hash_table[hash_result] = temp; - time_info[r_list[hash_table[hash_result]].time_pos].position = hash_table[hash_result]; - hash_result = end_collision_chain; - r_list[hash_table[hash_result]].hash_entry = hash_result; - time_info[r_list[hash_table[hash_result]].time_pos].position = hash_table[hash_result]; - } - location = hash_table[hash_result]; - hash_table[r_list[location].hash_entry] = -1; - time_loc = r_list[location].time_pos; - time_info[time_loc].time = 0; - time_info[time_loc].position = location; - while((time_info[(time_loc+1) % ip_list_tot].time < time_info[time_loc].time) && ((time_loc+1) % ip_list_tot) != curr_table->time_pos) { - time_temp = time_info[time_loc].time; - time_info[time_loc].time = time_info[(time_loc+1)%ip_list_tot].time; - time_info[(time_loc+1)%ip_list_tot].time = time_temp; - time_temp = time_info[time_loc].position; - time_info[time_loc].position = time_info[(time_loc+1)%ip_list_tot].position; - time_info[(time_loc+1)%ip_list_tot].position = time_temp; - r_list[time_info[time_loc].position].time_pos = time_loc; - r_list[time_info[(time_loc+1)%ip_list_tot].position].time_pos = (time_loc+1)%ip_list_tot; - time_loc = (time_loc+1) % ip_list_tot; - } - r_list[location].time_pos = time_loc; - r_list[location].last_seen = 0; - r_list[location].addr = 0; - r_list[location].ttl = 0; - memset(r_list[location].last_pkts,0,ip_pkt_list_tot*sizeof(unsigned long)); - r_list[location].oldest_pkt = 0; - ans = !info->invert; - } - spin_unlock_bh(&curr_table->list_lock); - return ans; } - spin_unlock_bh(&curr_table->list_lock); -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": match() left.\n"); -#endif - return ans; + if (info->check_set & IPT_RECENT_SET || + (info->check_set & IPT_RECENT_UPDATE && ret)) { + recent_entry_update(t, e); + e->ttl = ttl; + } +out: + spin_unlock_bh(&recent_lock); + return ret; } -/* This function is to verify that the rule given during the userspace iptables - * command is correct. - * If the command is valid then we check if the table name referred to by the - * rule exists, if not it is created. - */ static int -checkentry(const char *tablename, - const void *ip, - const struct xt_match *match, - void *matchinfo, - unsigned int matchsize, - unsigned int hook_mask) +ipt_recent_checkentry(const char *tablename, const void *ip, + const struct xt_match *match, void *matchinfo, + unsigned int matchsize, unsigned int hook_mask) { - int flag = 0, c; - unsigned long *hold; const struct ipt_recent_info *info = matchinfo; - struct recent_ip_tables *curr_table, *find_table, *last_table; - -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": checkentry() entered.\n"); -#endif - - /* seconds and hit_count only valid for CHECK/UPDATE */ - if(info->check_set & IPT_RECENT_SET) { flag++; if(info->seconds || info->hit_count) return 0; } - if(info->check_set & IPT_RECENT_REMOVE) { flag++; if(info->seconds || info->hit_count) return 0; } - if(info->check_set & IPT_RECENT_CHECK) flag++; - if(info->check_set & IPT_RECENT_UPDATE) flag++; - - /* One and only one of these should ever be set */ - if(flag != 1) return 0; - - /* Name must be set to something */ - if(!info->name || !info->name[0]) return 0; + struct recent_table *t; + unsigned i; + int ret = 0; - /* Things look good, create a list for this if it does not exist */ - /* Lock the linked list while we play with it */ - spin_lock_bh(&recent_lock); - - /* Look for an entry with this name already created */ - /* Finds the end of the list and the entry before the end if current name does not exist */ - find_table = r_tables; - while( (last_table = find_table) && strncmp(info->name,find_table->name,IPT_RECENT_NAME_LEN) && (find_table = find_table->next) ); + if (hweight8(info->check_set & + (IPT_RECENT_SET | IPT_RECENT_REMOVE | + IPT_RECENT_CHECK | IPT_RECENT_UPDATE)) != 1) + return 0; + if ((info->check_set & (IPT_RECENT_SET | IPT_RECENT_REMOVE)) && + (info->seconds || info->hit_count)) + return 0; + if (info->name[0] == '\0' || + strnlen(info->name, IPT_RECENT_NAME_LEN) == IPT_RECENT_NAME_LEN) + return 0; - /* If a table already exists just increment the count on that table and return */ - if(find_table) { -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: table found (%s), incrementing count.\n",info->name); -#endif - find_table->count++; - spin_unlock_bh(&recent_lock); - return 1; + mutex_lock(&recent_mutex); + t = recent_table_lookup(info->name); + if (t != NULL) { + t->refcnt++; + ret = 1; + goto out; } - spin_unlock_bh(&recent_lock); - - /* Table with this name not found */ - /* Allocate memory for new linked list item */ - -#ifdef DEBUG - if(debug) { - printk(KERN_INFO RECENT_NAME ": checkentry: no table found (%s)\n",info->name); - printk(KERN_INFO RECENT_NAME ": checkentry: Allocationg %d for link-list entry.\n",sizeof(struct recent_ip_tables)); + t = kzalloc(sizeof(*t) + sizeof(t->iphash[0]) * ip_list_hash_size, + GFP_KERNEL); + if (t == NULL) + goto out; + t->refcnt = 1; + strcpy(t->name, info->name); + INIT_LIST_HEAD(&t->lru_list); + for (i = 0; i < ip_list_hash_size; i++) + INIT_LIST_HEAD(&t->iphash[i]); +#ifdef CONFIG_PROC_FS + t->proc = create_proc_entry(t->name, ip_list_perms, proc_dir); + if (t->proc == NULL) { + kfree(t); + goto out; } + t->proc->proc_fops = &recent_fops; + t->proc->data = t; #endif + spin_lock_bh(&recent_lock); + list_add_tail(&t->list, &tables); + spin_unlock_bh(&recent_lock); + ret = 1; +out: + mutex_unlock(&recent_mutex); + return ret; +} - curr_table = vmalloc(sizeof(struct recent_ip_tables)); - if(curr_table == NULL) return 0; - - spin_lock_init(&curr_table->list_lock); - curr_table->next = NULL; - curr_table->count = 1; - curr_table->time_pos = 0; - strncpy(curr_table->name,info->name,IPT_RECENT_NAME_LEN); - curr_table->name[IPT_RECENT_NAME_LEN-1] = '\0'; - - /* Allocate memory for this table and the list of packets in each entry. */ -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: Allocating %d for table (%s).\n", - sizeof(struct recent_ip_list)*ip_list_tot, - info->name); -#endif - - curr_table->table = vmalloc(sizeof(struct recent_ip_list)*ip_list_tot); - if(curr_table->table == NULL) { vfree(curr_table); return 0; } - memset(curr_table->table,0,sizeof(struct recent_ip_list)*ip_list_tot); -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: Allocating %d for pkt_list.\n", - sizeof(unsigned long)*ip_pkt_list_tot*ip_list_tot); -#endif - - hold = vmalloc(sizeof(unsigned long)*ip_pkt_list_tot*ip_list_tot); -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: After pkt_list allocation.\n"); -#endif - if(hold == NULL) { - printk(KERN_INFO RECENT_NAME ": checkentry: unable to allocate for pkt_list.\n"); - vfree(curr_table->table); - vfree(curr_table); - return 0; - } - for(c = 0; c < ip_list_tot; c++) { - curr_table->table[c].last_pkts = hold + c*ip_pkt_list_tot; - } +static void +ipt_recent_destroy(const struct xt_match *match, void *matchinfo, + unsigned int matchsize) +{ + const struct ipt_recent_info *info = matchinfo; + struct recent_table *t; - /* Allocate memory for the hash table */ -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: Allocating %d for hash_table.\n", - sizeof(int)*ip_list_hash_size); + mutex_lock(&recent_mutex); + t = recent_table_lookup(info->name); + if (--t->refcnt == 0) { + spin_lock_bh(&recent_lock); + list_del(&t->list); + spin_unlock_bh(&recent_lock); + recent_table_flush(t); +#ifdef CONFIG_PROC_FS + remove_proc_entry(t->name, proc_dir); #endif - - curr_table->hash_table = vmalloc(sizeof(int)*ip_list_hash_size); - if(!curr_table->hash_table) { - printk(KERN_INFO RECENT_NAME ": checkentry: unable to allocate for hash_table.\n"); - vfree(hold); - vfree(curr_table->table); - vfree(curr_table); - return 0; - } - - for(c = 0; c < ip_list_hash_size; c++) { - curr_table->hash_table[c] = -1; + kfree(t); } + mutex_unlock(&recent_mutex); +} - /* Allocate memory for the time info */ -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: Allocating %d for time_info.\n", - sizeof(struct time_info_list)*ip_list_tot); -#endif +#ifdef CONFIG_PROC_FS +struct recent_iter_state { + struct recent_table *table; + unsigned int bucket; +}; - curr_table->time_info = vmalloc(sizeof(struct time_info_list)*ip_list_tot); - if(!curr_table->time_info) { - printk(KERN_INFO RECENT_NAME ": checkentry: unable to allocate for time_info.\n"); - vfree(curr_table->hash_table); - vfree(hold); - vfree(curr_table->table); - vfree(curr_table); - return 0; - } - for(c = 0; c < ip_list_tot; c++) { - curr_table->time_info[c].position = c; - curr_table->time_info[c].time = 0; - } +static void *recent_seq_start(struct seq_file *seq, loff_t *pos) +{ + struct recent_iter_state *st = seq->private; + struct recent_table *t = st->table; + struct recent_entry *e; + loff_t p = *pos; - /* Put the new table in place */ spin_lock_bh(&recent_lock); - find_table = r_tables; - while( (last_table = find_table) && strncmp(info->name,find_table->name,IPT_RECENT_NAME_LEN) && (find_table = find_table->next) ); - - /* If a table already exists just increment the count on that table and return */ - if(find_table) { - find_table->count++; - spin_unlock_bh(&recent_lock); -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: table found (%s), created by other process.\n",info->name); -#endif - vfree(curr_table->time_info); - vfree(curr_table->hash_table); - vfree(hold); - vfree(curr_table->table); - vfree(curr_table); - return 1; - } - if(!last_table) r_tables = curr_table; else last_table->next = curr_table; - - spin_unlock_bh(&recent_lock); -#ifdef CONFIG_PROC_FS - /* Create our proc 'status' entry. */ - curr_table->status_proc = create_proc_entry(curr_table->name, ip_list_perms, proc_net_ipt_recent); - if (!curr_table->status_proc) { - vfree(hold); - printk(KERN_INFO RECENT_NAME ": checkentry: unable to allocate for /proc entry.\n"); - /* Destroy the created table */ - spin_lock_bh(&recent_lock); - last_table = NULL; - curr_table = r_tables; - if(!curr_table) { -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": checkentry() create_proc failed, no tables.\n"); -#endif - spin_unlock_bh(&recent_lock); - return 0; - } - while( strncmp(info->name,curr_table->name,IPT_RECENT_NAME_LEN) && (last_table = curr_table) && (curr_table = curr_table->next) ); - if(!curr_table) { -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": checkentry() create_proc failed, table already destroyed.\n"); -#endif - spin_unlock_bh(&recent_lock); - return 0; + for (st->bucket = 0; st->bucket < ip_list_hash_size; st->bucket++) { + list_for_each_entry(e, &t->iphash[st->bucket], list) { + if (p-- == 0) + return e; } - if(last_table) last_table->next = curr_table->next; else r_tables = curr_table->next; - spin_unlock_bh(&recent_lock); - vfree(curr_table->time_info); - vfree(curr_table->hash_table); - vfree(curr_table->table); - vfree(curr_table); - return 0; } - - curr_table->status_proc->owner = THIS_MODULE; - curr_table->status_proc->data = curr_table; - wmb(); - curr_table->status_proc->read_proc = ip_recent_get_info; - curr_table->status_proc->write_proc = ip_recent_ctrl; -#endif /* CONFIG_PROC_FS */ - -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": checkentry() left.\n"); -#endif + return NULL; +} - return 1; +static void *recent_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct recent_iter_state *st = seq->private; + struct recent_table *t = st->table; + struct recent_entry *e = v; + struct list_head *head = e->list.next; + + while (head == &t->iphash[st->bucket]) { + if (++st->bucket >= ip_list_hash_size) + return NULL; + head = t->iphash[st->bucket].next; + } + (*pos)++; + return list_entry(head, struct recent_entry, list); } -/* This function is called in the event that a rule matching this module is - * removed. - * When this happens we need to check if there are no other rules matching - * the table given. If that is the case then we remove the table and clean - * up its memory. - */ -static void -destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize) +static void recent_seq_stop(struct seq_file *s, void *v) { - const struct ipt_recent_info *info = matchinfo; - struct recent_ip_tables *curr_table, *last_table; + spin_unlock_bh(&recent_lock); +} -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": destroy() entered.\n"); -#endif +static int recent_seq_show(struct seq_file *seq, void *v) +{ + struct recent_entry *e = v; + unsigned int i; + + i = (e->index - 1) % ip_pkt_list_tot; + seq_printf(seq, "src=%u.%u.%u.%u ttl: %u last_seen: %lu oldest_pkt: %u", + NIPQUAD(e->addr), e->ttl, e->stamps[i], e->index); + for (i = 0; i < e->nstamps; i++) + seq_printf(seq, "%s %lu", i ? "," : "", e->stamps[i]); + seq_printf(seq, "\n"); + return 0; +} - if(matchsize != IPT_ALIGN(sizeof(struct ipt_recent_info))) return; +static struct seq_operations recent_seq_ops = { + .start = recent_seq_start, + .next = recent_seq_next, + .stop = recent_seq_stop, + .show = recent_seq_show, +}; - /* Lock the linked list while we play with it */ - spin_lock_bh(&recent_lock); +static int recent_seq_open(struct inode *inode, struct file *file) +{ + struct proc_dir_entry *pde = PDE(inode); + struct seq_file *seq; + struct recent_iter_state *st; + int ret; + + st = kzalloc(sizeof(*st), GFP_KERNEL); + if (st == NULL) + return -ENOMEM; + ret = seq_open(file, &recent_seq_ops); + if (ret) + kfree(st); + st->table = pde->data; + seq = file->private_data; + seq->private = st; + return ret; +} - /* Look for an entry with this name already created */ - /* Finds the end of the list and the entry before the end if current name does not exist */ - last_table = NULL; - curr_table = r_tables; - if(!curr_table) { -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": destroy() No tables found, leaving.\n"); -#endif +static ssize_t recent_proc_write(struct file *file, const char __user *input, + size_t size, loff_t *loff) +{ + struct proc_dir_entry *pde = PDE(file->f_dentry->d_inode); + struct recent_table *t = pde->data; + struct recent_entry *e; + char buf[sizeof("+255.255.255.255")], *c = buf; + u_int32_t addr; + int add; + + if (size > sizeof(buf)) + size = sizeof(buf); + if (copy_from_user(buf, input, size)) + return -EFAULT; + while (isspace(*c)) + c++; + + if (size - (c - buf) < 5) + return c - buf; + if (!strncmp(c, "clear", 5)) { + c += 5; + spin_lock_bh(&recent_lock); + recent_table_flush(t); spin_unlock_bh(&recent_lock); - return; + return c - buf; } - while( strncmp(info->name,curr_table->name,IPT_RECENT_NAME_LEN) && (last_table = curr_table) && (curr_table = curr_table->next) ); - /* If a table does not exist then do nothing and return */ - if(!curr_table) { -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": destroy() table not found, leaving.\n"); -#endif - spin_unlock_bh(&recent_lock); - return; + switch (*c) { + case '-': + add = 0; + c++; + break; + case '+': + c++; + default: + add = 1; + break; } + addr = in_aton(c); - curr_table->count--; - - /* If count is still non-zero then there are still rules referenceing it so we do nothing */ - if(curr_table->count) { -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": destroy() table found, non-zero count, leaving.\n"); -#endif - spin_unlock_bh(&recent_lock); - return; + spin_lock_bh(&recent_lock); + e = recent_entry_lookup(t, addr, 0); + if (e == NULL) { + if (add) + recent_entry_init(t, addr, 0); + } else { + if (add) + recent_entry_update(t, e); + else + recent_entry_remove(t, e); } - -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": destroy() table found, zero count, removing.\n"); -#endif - - /* Count must be zero so we remove this table from the list */ - if(last_table) last_table->next = curr_table->next; else r_tables = curr_table->next; - spin_unlock_bh(&recent_lock); + return size; +} - /* lock to make sure any late-runners still using this after we removed it from - * the list finish up then remove everything */ - spin_lock_bh(&curr_table->list_lock); - spin_unlock_bh(&curr_table->list_lock); - -#ifdef CONFIG_PROC_FS - if(curr_table->status_proc) remove_proc_entry(curr_table->name,proc_net_ipt_recent); +static struct file_operations recent_fops = { + .open = recent_seq_open, + .read = seq_read, + .write = recent_proc_write, + .release = seq_release_private, + .owner = THIS_MODULE, +}; #endif /* CONFIG_PROC_FS */ - vfree(curr_table->table[0].last_pkts); - vfree(curr_table->table); - vfree(curr_table->hash_table); - vfree(curr_table->time_info); - vfree(curr_table); - -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": destroy() left.\n"); -#endif - return; -} - -/* This is the structure we pass to ipt_register to register our - * module with iptables. - */ static struct ipt_match recent_match = { .name = "recent", - .match = match, + .match = ipt_recent_match, .matchsize = sizeof(struct ipt_recent_info), - .checkentry = checkentry, - .destroy = destroy, - .me = THIS_MODULE + .checkentry = ipt_recent_checkentry, + .destroy = ipt_recent_destroy, + .me = THIS_MODULE, }; -/* Kernel module initialization. */ static int __init ipt_recent_init(void) { - int err, count; + int err; - printk(version); -#ifdef CONFIG_PROC_FS - proc_net_ipt_recent = proc_mkdir("ipt_recent",proc_net); - if(!proc_net_ipt_recent) return -ENOMEM; -#endif - - if(ip_list_hash_size && ip_list_hash_size <= ip_list_tot) { - printk(KERN_WARNING RECENT_NAME ": ip_list_hash_size too small, resetting to default.\n"); - ip_list_hash_size = 0; - } - - if(!ip_list_hash_size) { - ip_list_hash_size = ip_list_tot*3; - count = 2*2; - while(ip_list_hash_size > count) count = count*2; - ip_list_hash_size = count; - } - -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": ip_list_hash_size: %d\n",ip_list_hash_size); -#endif + if (!ip_list_tot || !ip_pkt_list_tot || ip_pkt_list_tot > 255) + return -EINVAL; + ip_list_hash_size = 1 << fls(ip_list_tot); err = ipt_register_match(&recent_match); +#ifdef CONFIG_PROC_FS if (err) - remove_proc_entry("ipt_recent", proc_net); + return err; + proc_dir = proc_mkdir("ipt_recent", proc_net); + if (proc_dir == NULL) { + ipt_unregister_match(&recent_match); + err = -ENOMEM; + } +#endif return err; } -/* Kernel module destruction. */ -static void __exit ipt_recent_fini(void) +static void __exit ipt_recent_exit(void) { + BUG_ON(!list_empty(&tables)); ipt_unregister_match(&recent_match); - - remove_proc_entry("ipt_recent",proc_net); +#ifdef CONFIG_PROC_FS + remove_proc_entry("ipt_recent", proc_net); +#endif } -/* Register our module with the kernel. */ module_init(ipt_recent_init); -module_exit(ipt_recent_fini); +module_exit(ipt_recent_exit); diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 397b95cc026b..4e7998beda63 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -10,7 +10,6 @@ * * Extended to all five netfilter hooks by Brad Chapman & Harald Welte */ -#include <linux/config.h> #include <linux/module.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netdevice.h> diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 77d974443c7b..0af803df82b0 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -14,7 +14,6 @@ * Derived from net/ipv4/netfilter/ip_conntrack_standalone.c */ -#include <linux/config.h> #include <linux/types.h> #include <linux/ip.h> #include <linux/netfilter.h> @@ -145,7 +144,7 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum, /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(*pskb, &ctinfo); - if (!ct) + if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) return NF_ACCEPT; help = nfct_help(ct); diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 4b0d361cc6e6..663a73ee3f2f 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -235,7 +235,7 @@ icmp_error(struct sk_buff *skb, unsigned int dataoff, } /* See ip_conntrack_proto_tcp.c */ - if (hooknum == NF_IP_PRE_ROUTING && + if (nf_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING && nf_ip_checksum(skb, hooknum, dataoff, 0)) { if (LOG_INVALID(IPPROTO_ICMP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index 291831e792af..05f5114828ea 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c @@ -32,7 +32,6 @@ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/string.h> -#include <linux/config.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/inet.h> diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index fc2562415555..bd221ec3f81e 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -103,7 +103,7 @@ static void raw_v4_unhash(struct sock *sk) } struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, - unsigned long raddr, unsigned long laddr, + __be32 raddr, __be32 laddr, int dif) { struct hlist_node *node; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index cc9423de7311..2dc6dbb28467 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -64,7 +64,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/module.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -206,21 +205,27 @@ __u8 ip_tos2prio[16] = { struct rt_hash_bucket { struct rtable *chain; }; -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ + defined(CONFIG_PROVE_LOCKING) /* * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks * The size of this table is a power of two and depends on the number of CPUS. + * (on lockdep we have a quite big spinlock_t, so keep the size down there) */ -#if NR_CPUS >= 32 -#define RT_HASH_LOCK_SZ 4096 -#elif NR_CPUS >= 16 -#define RT_HASH_LOCK_SZ 2048 -#elif NR_CPUS >= 8 -#define RT_HASH_LOCK_SZ 1024 -#elif NR_CPUS >= 4 -#define RT_HASH_LOCK_SZ 512 +#ifdef CONFIG_LOCKDEP +# define RT_HASH_LOCK_SZ 256 #else -#define RT_HASH_LOCK_SZ 256 +# if NR_CPUS >= 32 +# define RT_HASH_LOCK_SZ 4096 +# elif NR_CPUS >= 16 +# define RT_HASH_LOCK_SZ 2048 +# elif NR_CPUS >= 8 +# define RT_HASH_LOCK_SZ 1024 +# elif NR_CPUS >= 4 +# define RT_HASH_LOCK_SZ 512 +# else +# define RT_HASH_LOCK_SZ 256 +# endif #endif static spinlock_t *rt_hash_locks; @@ -244,7 +249,7 @@ static unsigned int rt_hash_rnd; static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); #define RT_CACHE_STAT_INC(field) \ - (per_cpu(rt_cache_stat, raw_smp_processor_id()).field++) + (__raw_get_cpu_var(rt_cache_stat).field++) static int rt_intern_hash(unsigned hash, struct rtable *rth, struct rtable **res); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 6b6c3adfcf00..70cea9d08a38 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -10,7 +10,6 @@ #include <linux/mm.h> #include <linux/module.h> #include <linux/sysctl.h> -#include <linux/config.h> #include <linux/igmp.h> #include <linux/inetdevice.h> #include <net/snmp.h> @@ -182,14 +181,6 @@ ctl_table ipv4_table[] = { .strategy = &ipv4_doint_and_flush_strategy, }, { - .ctl_name = NET_IPV4_AUTOCONFIG, - .procname = "ip_autoconfig", - .data = &ipv4_config.autoconfig, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { .ctl_name = NET_IPV4_NO_PMTU_DISC, .procname = "ip_no_pmtu_disc", .data = &ipv4_config.no_pmtu_disc, @@ -688,6 +679,24 @@ ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = &proc_dointvec }, +#ifdef CONFIG_NET_DMA + { + .ctl_name = NET_TCP_DMA_COPYBREAK, + .procname = "tcp_dma_copybreak", + .data = &sysctl_tcp_dma_copybreak, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, +#endif + { + .ctl_name = NET_TCP_SLOW_START_AFTER_IDLE, + .procname = "tcp_slow_start_after_idle", + .data = &sysctl_tcp_slow_start_after_idle, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, { .ctl_name = 0 } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e2b7b8055037..f6a2d9223d07 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -247,7 +247,6 @@ * TCP_CLOSE socket is finished */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/fcntl.h> @@ -258,12 +257,13 @@ #include <linux/random.h> #include <linux/bootmem.h> #include <linux/cache.h> +#include <linux/err.h> #include <net/icmp.h> #include <net/tcp.h> #include <net/xfrm.h> #include <net/ip.h> - +#include <net/netdma.h> #include <asm/uaccess.h> #include <asm/ioctls.h> @@ -571,7 +571,7 @@ new_segment: skb->ip_summed = CHECKSUM_HW; tp->write_seq += copy; TCP_SKB_CB(skb)->end_seq += copy; - skb_shinfo(skb)->tso_segs = 0; + skb_shinfo(skb)->gso_segs = 0; if (!copied) TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH; @@ -622,14 +622,10 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, ssize_t res; struct sock *sk = sock->sk; -#define TCP_ZC_CSUM_FLAGS (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM) - if (!(sk->sk_route_caps & NETIF_F_SG) || - !(sk->sk_route_caps & TCP_ZC_CSUM_FLAGS)) + !(sk->sk_route_caps & NETIF_F_ALL_CSUM)) return sock_no_sendpage(sock, page, offset, size, flags); -#undef TCP_ZC_CSUM_FLAGS - lock_sock(sk); TCP_CHECK_TIMER(sk); res = do_tcp_sendpages(sk, &page, offset, size, flags); @@ -646,7 +642,7 @@ static inline int select_size(struct sock *sk, struct tcp_sock *tp) int tmp = tp->mss_cache; if (sk->sk_route_caps & NETIF_F_SG) { - if (sk->sk_route_caps & NETIF_F_TSO) + if (sk_can_gso(sk)) tmp = 0; else { int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER); @@ -726,9 +722,7 @@ new_segment: /* * Check whether we can use HW checksum. */ - if (sk->sk_route_caps & - (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | - NETIF_F_HW_CSUM)) + if (sk->sk_route_caps & NETIF_F_ALL_CSUM) skb->ip_summed = CHECKSUM_HW; skb_entail(sk, tp, skb); @@ -824,7 +818,7 @@ new_segment: tp->write_seq += copy; TCP_SKB_CB(skb)->end_seq += copy; - skb_shinfo(skb)->tso_segs = 0; + skb_shinfo(skb)->gso_segs = 0; from += copy; copied += copy; @@ -937,7 +931,7 @@ static int tcp_recv_urg(struct sock *sk, long timeo, * calculation of whether or not we must ACK for the sake of * a window update. */ -static void cleanup_rbuf(struct sock *sk, int copied) +void tcp_cleanup_rbuf(struct sock *sk, int copied) { struct tcp_sock *tp = tcp_sk(sk); int time_to_ack = 0; @@ -1072,11 +1066,11 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, break; } if (skb->h.th->fin) { - sk_eat_skb(sk, skb); + sk_eat_skb(sk, skb, 0); ++seq; break; } - sk_eat_skb(sk, skb); + sk_eat_skb(sk, skb, 0); if (!desc->count) break; } @@ -1086,7 +1080,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, /* Clean up data we have read: This will do ACK frames. */ if (copied) - cleanup_rbuf(sk, copied); + tcp_cleanup_rbuf(sk, copied); return copied; } @@ -1110,6 +1104,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, int target; /* Read at least this many bytes */ long timeo; struct task_struct *user_recv = NULL; + int copied_early = 0; lock_sock(sk); @@ -1133,6 +1128,17 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); +#ifdef CONFIG_NET_DMA + tp->ucopy.dma_chan = NULL; + preempt_disable(); + if ((len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && + !sysctl_tcp_low_latency && __get_cpu_var(softnet_data.net_dma)) { + preempt_enable_no_resched(); + tp->ucopy.pinned_list = dma_pin_iovec_pages(msg->msg_iov, len); + } else + preempt_enable_no_resched(); +#endif + do { struct sk_buff *skb; u32 offset; @@ -1220,7 +1226,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } } - cleanup_rbuf(sk, copied); + tcp_cleanup_rbuf(sk, copied); if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) { /* Install new reader */ @@ -1274,6 +1280,10 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } else sk_wait_data(sk, &timeo); +#ifdef CONFIG_NET_DMA + tp->ucopy.wakeup = 0; +#endif + if (user_recv) { int chunk; @@ -1329,13 +1339,39 @@ do_prequeue: } if (!(flags & MSG_TRUNC)) { - err = skb_copy_datagram_iovec(skb, offset, - msg->msg_iov, used); - if (err) { - /* Exception. Bailout! */ - if (!copied) - copied = -EFAULT; - break; +#ifdef CONFIG_NET_DMA + if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) + tp->ucopy.dma_chan = get_softnet_dma(); + + if (tp->ucopy.dma_chan) { + tp->ucopy.dma_cookie = dma_skb_copy_datagram_iovec( + tp->ucopy.dma_chan, skb, offset, + msg->msg_iov, used, + tp->ucopy.pinned_list); + + if (tp->ucopy.dma_cookie < 0) { + + printk(KERN_ALERT "dma_cookie < 0\n"); + + /* Exception. Bailout! */ + if (!copied) + copied = -EFAULT; + break; + } + if ((offset + used) == skb->len) + copied_early = 1; + + } else +#endif + { + err = skb_copy_datagram_iovec(skb, offset, + msg->msg_iov, used); + if (err) { + /* Exception. Bailout! */ + if (!copied) + copied = -EFAULT; + break; + } } } @@ -1355,15 +1391,19 @@ skip_copy: if (skb->h.th->fin) goto found_fin_ok; - if (!(flags & MSG_PEEK)) - sk_eat_skb(sk, skb); + if (!(flags & MSG_PEEK)) { + sk_eat_skb(sk, skb, copied_early); + copied_early = 0; + } continue; found_fin_ok: /* Process the FIN. */ ++*seq; - if (!(flags & MSG_PEEK)) - sk_eat_skb(sk, skb); + if (!(flags & MSG_PEEK)) { + sk_eat_skb(sk, skb, copied_early); + copied_early = 0; + } break; } while (len > 0); @@ -1386,12 +1426,42 @@ skip_copy: tp->ucopy.len = 0; } +#ifdef CONFIG_NET_DMA + if (tp->ucopy.dma_chan) { + struct sk_buff *skb; + dma_cookie_t done, used; + + dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); + + while (dma_async_memcpy_complete(tp->ucopy.dma_chan, + tp->ucopy.dma_cookie, &done, + &used) == DMA_IN_PROGRESS) { + /* do partial cleanup of sk_async_wait_queue */ + while ((skb = skb_peek(&sk->sk_async_wait_queue)) && + (dma_async_is_complete(skb->dma_cookie, done, + used) == DMA_SUCCESS)) { + __skb_dequeue(&sk->sk_async_wait_queue); + kfree_skb(skb); + } + } + + /* Safe to free early-copied skbs now */ + __skb_queue_purge(&sk->sk_async_wait_queue); + dma_chan_put(tp->ucopy.dma_chan); + tp->ucopy.dma_chan = NULL; + } + if (tp->ucopy.pinned_list) { + dma_unpin_iovec_pages(tp->ucopy.pinned_list); + tp->ucopy.pinned_list = NULL; + } +#endif + /* According to UNIX98, msg_name/msg_namelen are ignored * on connected socket. I was just happy when found this 8) --ANK */ /* Clean up data we have read: This will do ACK frames. */ - cleanup_rbuf(sk, copied); + tcp_cleanup_rbuf(sk, copied); TCP_CHECK_TIMER(sk); release_sock(sk); @@ -1658,6 +1728,9 @@ int tcp_disconnect(struct sock *sk, int flags) __skb_queue_purge(&sk->sk_receive_queue); sk_stream_writequeue_purge(sk); __skb_queue_purge(&tp->out_of_order_queue); +#ifdef CONFIG_NET_DMA + __skb_queue_purge(&sk->sk_async_wait_queue); +#endif inet->dport = 0; @@ -1858,7 +1931,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && inet_csk_ack_scheduled(sk)) { icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; - cleanup_rbuf(sk, 1); + tcp_cleanup_rbuf(sk, 1); if (!(val & 1)) icsk->icsk_ack.pingpong = 1; } @@ -2071,6 +2144,89 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname, EXPORT_SYMBOL(compat_tcp_getsockopt); #endif +struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + struct tcphdr *th; + unsigned thlen; + unsigned int seq; + unsigned int delta; + unsigned int oldlen; + unsigned int len; + + if (!pskb_may_pull(skb, sizeof(*th))) + goto out; + + th = skb->h.th; + thlen = th->doff * 4; + if (thlen < sizeof(*th)) + goto out; + + if (!pskb_may_pull(skb, thlen)) + goto out; + + oldlen = (u16)~skb->len; + __skb_pull(skb, thlen); + + if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { + /* Packet is from an untrusted source, reset gso_segs. */ + int type = skb_shinfo(skb)->gso_type; + int mss; + + if (unlikely(type & + ~(SKB_GSO_TCPV4 | + SKB_GSO_DODGY | + SKB_GSO_TCP_ECN | + SKB_GSO_TCPV6 | + 0) || + !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) + goto out; + + mss = skb_shinfo(skb)->gso_size; + skb_shinfo(skb)->gso_segs = (skb->len + mss - 1) / mss; + + segs = NULL; + goto out; + } + + segs = skb_segment(skb, features); + if (IS_ERR(segs)) + goto out; + + len = skb_shinfo(skb)->gso_size; + delta = htonl(oldlen + (thlen + len)); + + skb = segs; + th = skb->h.th; + seq = ntohl(th->seq); + + do { + th->fin = th->psh = 0; + + th->check = ~csum_fold(th->check + delta); + if (skb->ip_summed != CHECKSUM_HW) + th->check = csum_fold(csum_partial(skb->h.raw, thlen, + skb->csum)); + + seq += len; + skb = skb->next; + th = skb->h.th; + + th->seq = htonl(seq); + th->cwr = 0; + } while (skb->next); + + delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len); + th->check = ~csum_fold(th->check + delta); + if (skb->ip_summed != CHECKSUM_HW) + th->check = csum_fold(csum_partial(skb->h.raw, thlen, + skb->csum)); + +out: + return segs; +} +EXPORT_SYMBOL(tcp_tso_segment); + extern void __skb_cb_too_small_for_tcp(int, int); extern struct tcp_congestion_ops tcp_reno; diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 035f2092d73a..b0134ab08379 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -12,7 +12,6 @@ * this behaves the same as the original Reno. */ -#include <linux/config.h> #include <linux/mm.h> #include <linux/module.h> #include <net/tcp.h> @@ -198,12 +197,6 @@ static u32 bictcp_undo_cwnd(struct sock *sk) return max(tp->snd_cwnd, ca->last_max_cwnd); } -static u32 bictcp_min_cwnd(struct sock *sk) -{ - const struct tcp_sock *tp = tcp_sk(sk); - return tp->snd_ssthresh; -} - static void bictcp_state(struct sock *sk, u8 new_state) { if (new_state == TCP_CA_Loss) @@ -231,7 +224,6 @@ static struct tcp_congestion_ops bictcp = { .cong_avoid = bictcp_cong_avoid, .set_state = bictcp_state, .undo_cwnd = bictcp_undo_cwnd, - .min_cwnd = bictcp_min_cwnd, .pkts_acked = bictcp_acked, .owner = THIS_MODULE, .name = "bic", diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 91c2f41c7f58..5765f9d03174 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -6,7 +6,6 @@ * Copyright (C) 2005 Stephen Hemminger <shemminger@osdl.org> */ -#include <linux/config.h> #include <linux/module.h> #include <linux/mm.h> #include <linux/types.h> @@ -38,7 +37,7 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca) int ret = 0; /* all algorithms must implement ssthresh and cong_avoid ops */ - if (!ca->ssthresh || !ca->cong_avoid || !ca->min_cwnd) { + if (!ca->ssthresh || !ca->cong_avoid) { printk(KERN_ERR "TCP %s does not implement required ops\n", ca->name); return -EINVAL; @@ -251,8 +250,8 @@ u32 tcp_reno_ssthresh(struct sock *sk) } EXPORT_SYMBOL_GPL(tcp_reno_ssthresh); -/* Lower bound on congestion window. */ -u32 tcp_reno_min_cwnd(struct sock *sk) +/* Lower bound on congestion window with halving. */ +u32 tcp_reno_min_cwnd(const struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); return tp->snd_ssthresh/2; diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 31a4986dfbf7..2be27980ca78 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -12,7 +12,6 @@ * this behaves the same as the original Reno. */ -#include <linux/config.h> #include <linux/mm.h> #include <linux/module.h> #include <net/tcp.h> @@ -325,11 +324,6 @@ static u32 bictcp_undo_cwnd(struct sock *sk) return max(tcp_sk(sk)->snd_cwnd, ca->last_max_cwnd); } -static u32 bictcp_min_cwnd(struct sock *sk) -{ - return tcp_sk(sk)->snd_ssthresh; -} - static void bictcp_state(struct sock *sk, u8 new_state) { if (new_state == TCP_CA_Loss) @@ -357,7 +351,6 @@ static struct tcp_congestion_ops cubictcp = { .cong_avoid = bictcp_cong_avoid, .set_state = bictcp_state, .undo_cwnd = bictcp_undo_cwnd, - .min_cwnd = bictcp_min_cwnd, .pkts_acked = bictcp_acked, .owner = THIS_MODULE, .name = "cubic", diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index c148c1081880..57c5f0b10e6c 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -11,7 +11,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/inet_diag.h> @@ -26,7 +25,10 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, const struct tcp_sock *tp = tcp_sk(sk); struct tcp_info *info = _info; - r->idiag_rqueue = tp->rcv_nxt - tp->copied_seq; + if (sk->sk_state == TCP_LISTEN) + r->idiag_rqueue = sk->sk_ack_backlog; + else + r->idiag_rqueue = tp->rcv_nxt - tp->copied_seq; r->idiag_wqueue = tp->write_seq - tp->snd_una; if (info != NULL) tcp_get_info(sk, info); diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index ba7c63ca5bb1..fa3e1aad660c 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c @@ -6,7 +6,6 @@ * John Heffner <jheffner@psc.edu> */ -#include <linux/config.h> #include <linux/module.h> #include <net/tcp.h> @@ -98,6 +97,10 @@ struct hstcp { u32 ai; }; +static int max_ssthresh = 100; +module_param(max_ssthresh, int, 0644); +MODULE_PARM_DESC(max_ssthresh, "limited slow start threshold (RFC3742)"); + static void hstcp_init(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -119,17 +122,36 @@ static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt, if (!tcp_is_cwnd_limited(sk, in_flight)) return; - if (tp->snd_cwnd <= tp->snd_ssthresh) - tcp_slow_start(tp); - else { - /* Update AIMD parameters */ + if (tp->snd_cwnd <= tp->snd_ssthresh) { + /* RFC3742: limited slow start + * the window is increased by 1/K MSS for each arriving ACK, + * for K = int(cwnd/(0.5 max_ssthresh)) + */ + if (max_ssthresh > 0 && tp->snd_cwnd > max_ssthresh) { + u32 k = max(tp->snd_cwnd / (max_ssthresh >> 1), 1U); + if (++tp->snd_cwnd_cnt >= k) { + if (tp->snd_cwnd < tp->snd_cwnd_clamp) + tp->snd_cwnd++; + tp->snd_cwnd_cnt = 0; + } + } else { + if (tp->snd_cwnd < tp->snd_cwnd_clamp) + tp->snd_cwnd++; + } + } else { + /* Update AIMD parameters. + * + * We want to guarantee that: + * hstcp_aimd_vals[ca->ai-1].cwnd < + * snd_cwnd <= + * hstcp_aimd_vals[ca->ai].cwnd + */ if (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd) { while (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd && ca->ai < HSTCP_AIMD_MAX - 1) ca->ai++; - } else if (tp->snd_cwnd < hstcp_aimd_vals[ca->ai].cwnd) { - while (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd && - ca->ai > 0) + } else if (ca->ai && tp->snd_cwnd <= hstcp_aimd_vals[ca->ai-1].cwnd) { + while (ca->ai && tp->snd_cwnd <= hstcp_aimd_vals[ca->ai-1].cwnd) ca->ai--; } diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 1b2ff53f98ed..6edfe5e4510e 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -6,7 +6,6 @@ * http://www.hamilton.ie/net/htcp3.pdf */ -#include <linux/config.h> #include <linux/mm.h> #include <linux/module.h> #include <net/tcp.h> @@ -246,14 +245,6 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, } } -/* Lower bound on congestion window. */ -static u32 htcp_min_cwnd(struct sock *sk) -{ - const struct tcp_sock *tp = tcp_sk(sk); - return tp->snd_ssthresh; -} - - static void htcp_init(struct sock *sk) { struct htcp *ca = inet_csk_ca(sk); @@ -285,7 +276,6 @@ static void htcp_state(struct sock *sk, u8 new_state) static struct tcp_congestion_ops htcp = { .init = htcp_init, .ssthresh = htcp_recalc_ssthresh, - .min_cwnd = htcp_min_cwnd, .cong_avoid = htcp_cong_avoid, .set_state = htcp_state, .undo_cwnd = htcp_cwnd_undo, diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index 40dbb3877510..7406e0c5fb8e 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -10,7 +10,6 @@ * root at danielinux.net */ -#include <linux/config.h> #include <linux/module.h> #include <net/tcp.h> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 4a538bc1683d..738dad9f7d49 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -63,7 +63,6 @@ * Pasi Sarolahti: F-RTO for dealing with spurious RTOs */ -#include <linux/config.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/sysctl.h> @@ -71,6 +70,7 @@ #include <net/inet_common.h> #include <linux/ipsec.h> #include <asm/unaligned.h> +#include <net/netdma.h> int sysctl_tcp_timestamps = 1; int sysctl_tcp_window_scaling = 1; @@ -1072,7 +1072,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ else pkt_len = (end_seq - TCP_SKB_CB(skb)->seq); - if (tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->tso_size)) + if (tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->gso_size)) break; pcount = tcp_skb_pcount(skb); } @@ -1649,7 +1649,7 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp) * Hence, we can detect timed out packets during fast * retransmit without falling to slow start. */ - if (tcp_head_timedout(sk, tp)) { + if (!IsReno(tp) && tcp_head_timedout(sk, tp)) { struct sk_buff *skb; skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint @@ -1662,8 +1662,6 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp) if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); - if (IsReno(tp)) - tcp_remove_reno_sacks(sk, tp, tcp_skb_pcount(skb) + 1); /* clear xmit_retrans hint */ if (tp->retransmit_skb_hint && @@ -1690,17 +1688,26 @@ static inline void tcp_moderate_cwnd(struct tcp_sock *tp) tp->snd_cwnd_stamp = tcp_time_stamp; } +/* Lower bound on congestion window is slow start threshold + * unless congestion avoidance choice decides to overide it. + */ +static inline u32 tcp_cwnd_min(const struct sock *sk) +{ + const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; + + return ca_ops->min_cwnd ? ca_ops->min_cwnd(sk) : tcp_sk(sk)->snd_ssthresh; +} + /* Decrease cwnd each second ack. */ static void tcp_cwnd_down(struct sock *sk) { - const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); int decr = tp->snd_cwnd_cnt + 1; tp->snd_cwnd_cnt = decr&1; decr >>= 1; - if (decr && tp->snd_cwnd > icsk->icsk_ca_ops->min_cwnd(sk)) + if (decr && tp->snd_cwnd > tcp_cwnd_min(sk)) tp->snd_cwnd -= decr; tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)+1); @@ -3787,6 +3794,50 @@ static inline int tcp_checksum_complete_user(struct sock *sk, struct sk_buff *sk __tcp_checksum_complete_user(sk, skb); } +#ifdef CONFIG_NET_DMA +static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen) +{ + struct tcp_sock *tp = tcp_sk(sk); + int chunk = skb->len - hlen; + int dma_cookie; + int copied_early = 0; + + if (tp->ucopy.wakeup) + return 0; + + if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) + tp->ucopy.dma_chan = get_softnet_dma(); + + if (tp->ucopy.dma_chan && skb->ip_summed == CHECKSUM_UNNECESSARY) { + + dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan, + skb, hlen, tp->ucopy.iov, chunk, tp->ucopy.pinned_list); + + if (dma_cookie < 0) + goto out; + + tp->ucopy.dma_cookie = dma_cookie; + copied_early = 1; + + tp->ucopy.len -= chunk; + tp->copied_seq += chunk; + tcp_rcv_space_adjust(sk); + + if ((tp->ucopy.len == 0) || + (tcp_flag_word(skb->h.th) & TCP_FLAG_PSH) || + (atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1))) { + tp->ucopy.wakeup = 1; + sk->sk_data_ready(sk, 0); + } + } else if (chunk > 0) { + tp->ucopy.wakeup = 1; + sk->sk_data_ready(sk, 0); + } +out: + return copied_early; +} +#endif /* CONFIG_NET_DMA */ + /* * TCP receive function for the ESTABLISHED state. * @@ -3888,8 +3939,6 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, tp->rcv_nxt == tp->rcv_wup) tcp_store_ts_recent(tp); - tcp_rcv_rtt_measure_ts(sk, skb); - /* We know that such packets are checksummed * on entry. */ @@ -3903,14 +3952,23 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, } } else { int eaten = 0; + int copied_early = 0; - if (tp->ucopy.task == current && - tp->copied_seq == tp->rcv_nxt && - len - tcp_header_len <= tp->ucopy.len && - sock_owned_by_user(sk)) { - __set_current_state(TASK_RUNNING); + if (tp->copied_seq == tp->rcv_nxt && + len - tcp_header_len <= tp->ucopy.len) { +#ifdef CONFIG_NET_DMA + if (tcp_dma_try_early_copy(sk, skb, tcp_header_len)) { + copied_early = 1; + eaten = 1; + } +#endif + if (tp->ucopy.task == current && sock_owned_by_user(sk) && !copied_early) { + __set_current_state(TASK_RUNNING); - if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) { + if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) + eaten = 1; + } + if (eaten) { /* Predicted packet is in window by definition. * seq == rcv_nxt and rcv_wup <= rcv_nxt. * Hence, check seq<=rcv_wup reduces to: @@ -3926,8 +3984,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, __skb_pull(skb, tcp_header_len); tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; NET_INC_STATS_BH(LINUX_MIB_TCPHPHITSTOUSER); - eaten = 1; } + if (copied_early) + tcp_cleanup_rbuf(sk, skb->len); } if (!eaten) { if (tcp_checksum_complete_user(sk, skb)) @@ -3968,6 +4027,11 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, __tcp_ack_snd_check(sk, 0); no_ack: +#ifdef CONFIG_NET_DMA + if (copied_early) + __skb_queue_tail(&sk->sk_async_wait_queue, skb); + else +#endif if (eaten) __kfree_skb(skb); else @@ -4113,8 +4177,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, */ TCP_ECN_rcv_synack(tp, th); - if (tp->ecn_flags&TCP_ECN_OK) - sock_set_flag(sk, SOCK_NO_LARGESEND); tp->snd_wl1 = TCP_SKB_CB(skb)->seq; tcp_ack(sk, skb, FLAG_SLOWPATH); @@ -4257,8 +4319,6 @@ discard: tp->max_window = tp->snd_wnd; TCP_ECN_rcv_syn(tp, th); - if (tp->ecn_flags&TCP_ECN_OK) - sock_set_flag(sk, SOCK_NO_LARGESEND); tcp_mtup_init(sk); tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 672950e54c49..a891133f00e4 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -52,7 +52,6 @@ * a single port at the same time. */ -#include <linux/config.h> #include <linux/types.h> #include <linux/fcntl.h> @@ -71,6 +70,7 @@ #include <net/inet_common.h> #include <net/timewait_sock.h> #include <net/xfrm.h> +#include <net/netdma.h> #include <linux/inet.h> #include <linux/ipv6.h> @@ -90,7 +90,7 @@ static struct socket *tcp_socket; void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { - .lhash_lock = RW_LOCK_UNLOCKED, + .lhash_lock = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock), .lhash_users = ATOMIC_INIT(0), .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), }; @@ -241,6 +241,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) goto failure; /* OK, now commit destination to socket. */ + sk->sk_gso_type = SKB_GSO_TCPV4; sk_setup_caps(sk, &rt->u.dst); if (!tp->write_seq) @@ -495,6 +496,24 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) } } +int tcp_v4_gso_send_check(struct sk_buff *skb) +{ + struct iphdr *iph; + struct tcphdr *th; + + if (!pskb_may_pull(skb, sizeof(*th))) + return -EINVAL; + + iph = skb->nh.iph; + th = skb->h.th; + + th->check = 0; + th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0); + skb->csum = offsetof(struct tcphdr, check); + skb->ip_summed = CHECKSUM_HW; + return 0; +} + /* * This routine will send an RST to the other tcp. * @@ -883,6 +902,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, if (!newsk) goto exit; + newsk->sk_gso_type = SKB_GSO_TCPV4; sk_setup_caps(newsk, dst); newtp = tcp_sk(newsk); @@ -1088,11 +1108,21 @@ process: skb->dev = NULL; - bh_lock_sock(sk); + bh_lock_sock_nested(sk); ret = 0; if (!sock_owned_by_user(sk)) { - if (!tcp_prequeue(sk, skb)) +#ifdef CONFIG_NET_DMA + struct tcp_sock *tp = tcp_sk(sk); + if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) + tp->ucopy.dma_chan = get_softnet_dma(); + if (tp->ucopy.dma_chan) + ret = tcp_v4_do_rcv(sk, skb); + else +#endif + { + if (!tcp_prequeue(sk, skb)) ret = tcp_v4_do_rcv(sk, skb); + } } else sk_add_backlog(sk, skb); bh_unlock_sock(sk); @@ -1296,6 +1326,11 @@ int tcp_v4_destroy_sock(struct sock *sk) /* Cleans up our, hopefully empty, out_of_order_queue. */ __skb_queue_purge(&tp->out_of_order_queue); +#ifdef CONFIG_NET_DMA + /* Cleans up our sk_async_wait_queue */ + __skb_queue_purge(&sk->sk_async_wait_queue); +#endif + /* Clean prequeue, it must be empty really */ __skb_queue_purge(&tp->ucopy.prequeue); @@ -1710,7 +1745,8 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i) sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " "%08X %5d %8d %lu %d %p %u %u %u %u %d", i, src, srcp, dest, destp, sp->sk_state, - tp->write_seq - tp->snd_una, tp->rcv_nxt - tp->copied_seq, + tp->write_seq - tp->snd_una, + (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq), timer_active, jiffies_to_clock_t(timer_expires - jiffies), icsk->icsk_retransmits, diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c new file mode 100644 index 000000000000..1f977b6ee9a1 --- /dev/null +++ b/net/ipv4/tcp_lp.c @@ -0,0 +1,338 @@ +/* + * TCP Low Priority (TCP-LP) + * + * TCP Low Priority is a distributed algorithm whose goal is to utilize only + * the excess network bandwidth as compared to the ``fair share`` of + * bandwidth as targeted by TCP. Available from: + * http://www.ece.rice.edu/~akuzma/Doc/akuzma/TCP-LP.pdf + * + * Original Author: + * Aleksandar Kuzmanovic <akuzma@northwestern.edu> + * + * See http://www-ece.rice.edu/networks/TCP-LP/ for their implementation. + * As of 2.6.13, Linux supports pluggable congestion control algorithms. + * Due to the limitation of the API, we take the following changes from + * the original TCP-LP implementation: + * o We use newReno in most core CA handling. Only add some checking + * within cong_avoid. + * o Error correcting in remote HZ, therefore remote HZ will be keeped + * on checking and updating. + * o Handling calculation of One-Way-Delay (OWD) within rtt_sample, sicne + * OWD have a similar meaning as RTT. Also correct the buggy formular. + * o Handle reaction for Early Congestion Indication (ECI) within + * pkts_acked, as mentioned within pseudo code. + * o OWD is handled in relative format, where local time stamp will in + * tcp_time_stamp format. + * + * Port from 2.4.19 to 2.6.16 as module by: + * Wong Hoi Sing Edison <hswong3i@gmail.com> + * Hung Hing Lun <hlhung3i@gmail.com> + * + * Version: $Id: tcp_lp.c,v 1.22 2006-05-02 18:18:19 hswong3i Exp $ + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <net/tcp.h> + +/* resolution of owd */ +#define LP_RESOL 1000 + +/** + * enum tcp_lp_state + * @LP_VALID_RHZ: is remote HZ valid? + * @LP_VALID_OWD: is OWD valid? + * @LP_WITHIN_THR: are we within threshold? + * @LP_WITHIN_INF: are we within inference? + * + * TCP-LP's state flags. + * We create this set of state flag mainly for debugging. + */ +enum tcp_lp_state { + LP_VALID_RHZ = (1 << 0), + LP_VALID_OWD = (1 << 1), + LP_WITHIN_THR = (1 << 3), + LP_WITHIN_INF = (1 << 4), +}; + +/** + * struct lp + * @flag: TCP-LP state flag + * @sowd: smoothed OWD << 3 + * @owd_min: min OWD + * @owd_max: max OWD + * @owd_max_rsv: resrved max owd + * @remote_hz: estimated remote HZ + * @remote_ref_time: remote reference time + * @local_ref_time: local reference time + * @last_drop: time for last active drop + * @inference: current inference + * + * TCP-LP's private struct. + * We get the idea from original TCP-LP implementation where only left those we + * found are really useful. + */ +struct lp { + u32 flag; + u32 sowd; + u32 owd_min; + u32 owd_max; + u32 owd_max_rsv; + u32 remote_hz; + u32 remote_ref_time; + u32 local_ref_time; + u32 last_drop; + u32 inference; +}; + +/** + * tcp_lp_init + * + * Init all required variables. + * Clone the handling from Vegas module implementation. + */ +static void tcp_lp_init(struct sock *sk) +{ + struct lp *lp = inet_csk_ca(sk); + + lp->flag = 0; + lp->sowd = 0; + lp->owd_min = 0xffffffff; + lp->owd_max = 0; + lp->owd_max_rsv = 0; + lp->remote_hz = 0; + lp->remote_ref_time = 0; + lp->local_ref_time = 0; + lp->last_drop = 0; + lp->inference = 0; +} + +/** + * tcp_lp_cong_avoid + * + * Implementation of cong_avoid. + * Will only call newReno CA when away from inference. + * From TCP-LP's paper, this will be handled in additive increasement. + */ +static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, + int flag) +{ + struct lp *lp = inet_csk_ca(sk); + + if (!(lp->flag & LP_WITHIN_INF)) + tcp_reno_cong_avoid(sk, ack, rtt, in_flight, flag); +} + +/** + * tcp_lp_remote_hz_estimator + * + * Estimate remote HZ. + * We keep on updating the estimated value, where original TCP-LP + * implementation only guest it for once and use forever. + */ +static u32 tcp_lp_remote_hz_estimator(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct lp *lp = inet_csk_ca(sk); + s64 rhz = lp->remote_hz << 6; /* remote HZ << 6 */ + s64 m = 0; + + /* not yet record reference time + * go away!! record it before come back!! */ + if (lp->remote_ref_time == 0 || lp->local_ref_time == 0) + goto out; + + /* we can't calc remote HZ with no different!! */ + if (tp->rx_opt.rcv_tsval == lp->remote_ref_time + || tp->rx_opt.rcv_tsecr == lp->local_ref_time) + goto out; + + m = HZ * (tp->rx_opt.rcv_tsval - + lp->remote_ref_time) / (tp->rx_opt.rcv_tsecr - + lp->local_ref_time); + if (m < 0) + m = -m; + + if (rhz != 0) { + m -= rhz >> 6; /* m is now error in remote HZ est */ + rhz += m; /* 63/64 old + 1/64 new */ + } else + rhz = m << 6; + + /* record time for successful remote HZ calc */ + lp->flag |= LP_VALID_RHZ; + + out: + /* record reference time stamp */ + lp->remote_ref_time = tp->rx_opt.rcv_tsval; + lp->local_ref_time = tp->rx_opt.rcv_tsecr; + + return rhz >> 6; +} + +/** + * tcp_lp_owd_calculator + * + * Calculate one way delay (in relative format). + * Original implement OWD as minus of remote time difference to local time + * difference directly. As this time difference just simply equal to RTT, when + * the network status is stable, remote RTT will equal to local RTT, and result + * OWD into zero. + * It seems to be a bug and so we fixed it. + */ +static u32 tcp_lp_owd_calculator(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct lp *lp = inet_csk_ca(sk); + s64 owd = 0; + + lp->remote_hz = tcp_lp_remote_hz_estimator(sk); + + if (lp->flag & LP_VALID_RHZ) { + owd = + tp->rx_opt.rcv_tsval * (LP_RESOL / lp->remote_hz) - + tp->rx_opt.rcv_tsecr * (LP_RESOL / HZ); + if (owd < 0) + owd = -owd; + } + + if (owd > 0) + lp->flag |= LP_VALID_OWD; + else + lp->flag &= ~LP_VALID_OWD; + + return owd; +} + +/** + * tcp_lp_rtt_sample + * + * Implementation or rtt_sample. + * Will take the following action, + * 1. calc OWD, + * 2. record the min/max OWD, + * 3. calc smoothed OWD (SOWD). + * Most ideas come from the original TCP-LP implementation. + */ +static void tcp_lp_rtt_sample(struct sock *sk, u32 usrtt) +{ + struct lp *lp = inet_csk_ca(sk); + s64 mowd = tcp_lp_owd_calculator(sk); + + /* sorry that we don't have valid data */ + if (!(lp->flag & LP_VALID_RHZ) || !(lp->flag & LP_VALID_OWD)) + return; + + /* record the next min owd */ + if (mowd < lp->owd_min) + lp->owd_min = mowd; + + /* always forget the max of the max + * we just set owd_max as one below it */ + if (mowd > lp->owd_max) { + if (mowd > lp->owd_max_rsv) { + if (lp->owd_max_rsv == 0) + lp->owd_max = mowd; + else + lp->owd_max = lp->owd_max_rsv; + lp->owd_max_rsv = mowd; + } else + lp->owd_max = mowd; + } + + /* calc for smoothed owd */ + if (lp->sowd != 0) { + mowd -= lp->sowd >> 3; /* m is now error in owd est */ + lp->sowd += mowd; /* owd = 7/8 owd + 1/8 new */ + } else + lp->sowd = mowd << 3; /* take the measured time be owd */ +} + +/** + * tcp_lp_pkts_acked + * + * Implementation of pkts_acked. + * Deal with active drop under Early Congestion Indication. + * Only drop to half and 1 will be handle, because we hope to use back + * newReno in increase case. + * We work it out by following the idea from TCP-LP's paper directly + */ +static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct lp *lp = inet_csk_ca(sk); + + /* calc inference */ + if (tcp_time_stamp > tp->rx_opt.rcv_tsecr) + lp->inference = 3 * (tcp_time_stamp - tp->rx_opt.rcv_tsecr); + + /* test if within inference */ + if (lp->last_drop && (tcp_time_stamp - lp->last_drop < lp->inference)) + lp->flag |= LP_WITHIN_INF; + else + lp->flag &= ~LP_WITHIN_INF; + + /* test if within threshold */ + if (lp->sowd >> 3 < + lp->owd_min + 15 * (lp->owd_max - lp->owd_min) / 100) + lp->flag |= LP_WITHIN_THR; + else + lp->flag &= ~LP_WITHIN_THR; + + pr_debug("TCP-LP: %05o|%5u|%5u|%15u|%15u|%15u\n", lp->flag, + tp->snd_cwnd, lp->remote_hz, lp->owd_min, lp->owd_max, + lp->sowd >> 3); + + if (lp->flag & LP_WITHIN_THR) + return; + + /* FIXME: try to reset owd_min and owd_max here + * so decrease the chance the min/max is no longer suitable + * and will usually within threshold when whithin inference */ + lp->owd_min = lp->sowd >> 3; + lp->owd_max = lp->sowd >> 2; + lp->owd_max_rsv = lp->sowd >> 2; + + /* happened within inference + * drop snd_cwnd into 1 */ + if (lp->flag & LP_WITHIN_INF) + tp->snd_cwnd = 1U; + + /* happened after inference + * cut snd_cwnd into half */ + else + tp->snd_cwnd = max(tp->snd_cwnd >> 1U, 1U); + + /* record this drop time */ + lp->last_drop = tcp_time_stamp; +} + +static struct tcp_congestion_ops tcp_lp = { + .init = tcp_lp_init, + .ssthresh = tcp_reno_ssthresh, + .cong_avoid = tcp_lp_cong_avoid, + .min_cwnd = tcp_reno_min_cwnd, + .rtt_sample = tcp_lp_rtt_sample, + .pkts_acked = tcp_lp_pkts_acked, + + .owner = THIS_MODULE, + .name = "lp" +}; + +static int __init tcp_lp_register(void) +{ + BUG_ON(sizeof(struct lp) > ICSK_CA_PRIV_SIZE); + return tcp_register_congestion_control(&tcp_lp); +} + +static void __exit tcp_lp_unregister(void) +{ + tcp_unregister_congestion_control(&tcp_lp); +} + +module_init(tcp_lp_register); +module_exit(tcp_lp_unregister); + +MODULE_AUTHOR("Wong Hoi Sing Edison, Hung Hing Lun"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("TCP Low Priority"); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 2b9b7f6c7f7c..0ccb7cb22b15 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -20,7 +20,6 @@ * Jorge Cwik, <jorge@laser.satlink.net> */ -#include <linux/config.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/sysctl.h> @@ -41,7 +40,7 @@ int sysctl_tcp_abort_on_overflow; struct inet_timewait_death_row tcp_death_row = { .sysctl_max_tw_buckets = NR_FILE * 2, .period = TCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS, - .death_lock = SPIN_LOCK_UNLOCKED, + .death_lock = __SPIN_LOCK_UNLOCKED(tcp_death_row.death_lock), .hashinfo = &tcp_hashinfo, .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0, (unsigned long)&tcp_death_row), @@ -440,8 +439,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; newtp->rx_opt.mss_clamp = req->mss; TCP_ECN_openreq_child(newtp, req); - if (newtp->ecn_flags&TCP_ECN_OK) - sock_set_flag(newsk, SOCK_NO_LARGESEND); TCP_INC_STATS_BH(TCP_MIB_PASSIVEOPENS); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 743016baa048..5c08ea20a18d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -59,6 +59,9 @@ int sysctl_tcp_tso_win_divisor = 3; int sysctl_tcp_mtu_probing = 0; int sysctl_tcp_base_mss = 512; +/* By default, RFC2861 behavior. */ +int sysctl_tcp_slow_start_after_idle = 1; + static void update_send_head(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb) { @@ -138,7 +141,8 @@ static void tcp_event_data_sent(struct tcp_sock *tp, struct inet_connection_sock *icsk = inet_csk(sk); const u32 now = tcp_time_stamp; - if (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto) + if (sysctl_tcp_slow_start_after_idle && + (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto)) tcp_cwnd_restart(sk, __sk_dst_get(sk)); tp->lsndtime = now; @@ -506,20 +510,21 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { - if (skb->len <= mss_now || - !(sk->sk_route_caps & NETIF_F_TSO)) { + if (skb->len <= mss_now || !sk_can_gso(sk)) { /* Avoid the costly divide in the normal * non-TSO case. */ - skb_shinfo(skb)->tso_segs = 1; - skb_shinfo(skb)->tso_size = 0; + skb_shinfo(skb)->gso_segs = 1; + skb_shinfo(skb)->gso_size = 0; + skb_shinfo(skb)->gso_type = 0; } else { unsigned int factor; factor = skb->len + (mss_now - 1); factor /= mss_now; - skb_shinfo(skb)->tso_segs = factor; - skb_shinfo(skb)->tso_size = mss_now; + skb_shinfo(skb)->gso_segs = factor; + skb_shinfo(skb)->gso_size = mss_now; + skb_shinfo(skb)->gso_type = sk->sk_gso_type; } } @@ -642,7 +647,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss * eventually). The difference is that pulled data not copied, but * immediately discarded. */ -static unsigned char *__pskb_trim_head(struct sk_buff *skb, int len) +static void __pskb_trim_head(struct sk_buff *skb, int len) { int i, k, eat; @@ -667,7 +672,6 @@ static unsigned char *__pskb_trim_head(struct sk_buff *skb, int len) skb->tail = skb->data; skb->data_len -= len; skb->len = skb->data_len; - return skb->tail; } int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) @@ -676,12 +680,11 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) return -ENOMEM; - if (len <= skb_headlen(skb)) { + /* If len == headlen, we avoid __skb_pull to preserve alignment. */ + if (unlikely(len < skb_headlen(skb))) __skb_pull(skb, len); - } else { - if (__pskb_trim_head(skb, len-skb_headlen(skb)) == NULL) - return -ENOMEM; - } + else + __pskb_trim_head(skb, len - skb_headlen(skb)); TCP_SKB_CB(skb)->seq += len; skb->ip_summed = CHECKSUM_HW; @@ -820,9 +823,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) mss_now = tp->mss_cache; - if (large_allowed && - (sk->sk_route_caps & NETIF_F_TSO) && - !tp->urg_mode) + if (large_allowed && sk_can_gso(sk) && !tp->urg_mode) doing_tso = 1; if (dst) { @@ -912,7 +913,7 @@ static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int if (!tso_segs || (tso_segs > 1 && - skb_shinfo(skb)->tso_size != mss_now)) { + tcp_skb_mss(skb) != mss_now)) { tcp_set_skb_tso_segs(sk, skb, mss_now); tso_segs = tcp_skb_pcount(skb); } @@ -1722,8 +1723,9 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) { if (!pskb_trim(skb, 0)) { TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1; - skb_shinfo(skb)->tso_segs = 1; - skb_shinfo(skb)->tso_size = 0; + skb_shinfo(skb)->gso_segs = 1; + skb_shinfo(skb)->gso_size = 0; + skb_shinfo(skb)->gso_type = 0; skb->ip_summed = CHECKSUM_NONE; skb->csum = 0; } @@ -1928,8 +1930,9 @@ void tcp_send_fin(struct sock *sk) skb->csum = 0; TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN); TCP_SKB_CB(skb)->sacked = 0; - skb_shinfo(skb)->tso_segs = 1; - skb_shinfo(skb)->tso_size = 0; + skb_shinfo(skb)->gso_segs = 1; + skb_shinfo(skb)->gso_size = 0; + skb_shinfo(skb)->gso_type = 0; /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ TCP_SKB_CB(skb)->seq = tp->write_seq; @@ -1961,8 +1964,9 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) skb->csum = 0; TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST); TCP_SKB_CB(skb)->sacked = 0; - skb_shinfo(skb)->tso_segs = 1; - skb_shinfo(skb)->tso_size = 0; + skb_shinfo(skb)->gso_segs = 1; + skb_shinfo(skb)->gso_size = 0; + skb_shinfo(skb)->gso_type = 0; /* Send it off. */ TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp); @@ -2037,16 +2041,15 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, memset(th, 0, sizeof(struct tcphdr)); th->syn = 1; th->ack = 1; - if (dst->dev->features&NETIF_F_TSO) - ireq->ecn_ok = 0; TCP_ECN_make_synack(req, th); th->source = inet_sk(sk)->sport; th->dest = ireq->rmt_port; TCP_SKB_CB(skb)->seq = tcp_rsk(req)->snt_isn; TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1; TCP_SKB_CB(skb)->sacked = 0; - skb_shinfo(skb)->tso_segs = 1; - skb_shinfo(skb)->tso_size = 0; + skb_shinfo(skb)->gso_segs = 1; + skb_shinfo(skb)->gso_size = 0; + skb_shinfo(skb)->gso_type = 0; th->seq = htonl(TCP_SKB_CB(skb)->seq); th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1); if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ @@ -2150,8 +2153,9 @@ int tcp_connect(struct sock *sk) TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN; TCP_ECN_send_syn(sk, tp, buff); TCP_SKB_CB(buff)->sacked = 0; - skb_shinfo(buff)->tso_segs = 1; - skb_shinfo(buff)->tso_size = 0; + skb_shinfo(buff)->gso_segs = 1; + skb_shinfo(buff)->gso_size = 0; + skb_shinfo(buff)->gso_type = 0; buff->csum = 0; TCP_SKB_CB(buff)->seq = tp->write_seq++; TCP_SKB_CB(buff)->end_seq = tp->write_seq; @@ -2255,8 +2259,9 @@ void tcp_send_ack(struct sock *sk) buff->csum = 0; TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK; TCP_SKB_CB(buff)->sacked = 0; - skb_shinfo(buff)->tso_segs = 1; - skb_shinfo(buff)->tso_size = 0; + skb_shinfo(buff)->gso_segs = 1; + skb_shinfo(buff)->gso_size = 0; + skb_shinfo(buff)->gso_type = 0; /* Send it off, this clears delayed acks for us. */ TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp); @@ -2291,8 +2296,9 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) skb->csum = 0; TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK; TCP_SKB_CB(skb)->sacked = urgent; - skb_shinfo(skb)->tso_segs = 1; - skb_shinfo(skb)->tso_size = 0; + skb_shinfo(skb)->gso_segs = 1; + skb_shinfo(skb)->gso_size = 0; + skb_shinfo(skb)->gso_type = 0; /* Use a previous sequence. This should cause the other * end to send an ack. Don't queue or clone SKB, just diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c new file mode 100644 index 000000000000..d7d517a3a238 --- /dev/null +++ b/net/ipv4/tcp_probe.c @@ -0,0 +1,181 @@ +/* + * tcpprobe - Observe the TCP flow with kprobes. + * + * The idea for this came from Werner Almesberger's umlsim + * Copyright (C) 2004, Stephen Hemminger <shemminger@osdl.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/kernel.h> +#include <linux/kprobes.h> +#include <linux/socket.h> +#include <linux/tcp.h> +#include <linux/proc_fs.h> +#include <linux/module.h> +#include <linux/kfifo.h> +#include <linux/vmalloc.h> + +#include <net/tcp.h> + +MODULE_AUTHOR("Stephen Hemminger <shemminger@osdl.org>"); +MODULE_DESCRIPTION("TCP cwnd snooper"); +MODULE_LICENSE("GPL"); + +static int port = 0; +MODULE_PARM_DESC(port, "Port to match (0=all)"); +module_param(port, int, 0); + +static int bufsize = 64*1024; +MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)"); +module_param(bufsize, int, 0); + +static const char procname[] = "tcpprobe"; + +struct { + struct kfifo *fifo; + spinlock_t lock; + wait_queue_head_t wait; + struct timeval tstart; +} tcpw; + +static void printl(const char *fmt, ...) +{ + va_list args; + int len; + struct timeval now; + char tbuf[256]; + + va_start(args, fmt); + do_gettimeofday(&now); + + now.tv_sec -= tcpw.tstart.tv_sec; + now.tv_usec -= tcpw.tstart.tv_usec; + if (now.tv_usec < 0) { + --now.tv_sec; + now.tv_usec += 1000000; + } + + len = sprintf(tbuf, "%lu.%06lu ", + (unsigned long) now.tv_sec, + (unsigned long) now.tv_usec); + len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args); + va_end(args); + + kfifo_put(tcpw.fifo, tbuf, len); + wake_up(&tcpw.wait); +} + +static int jtcp_sendmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t size) +{ + const struct tcp_sock *tp = tcp_sk(sk); + const struct inet_sock *inet = inet_sk(sk); + + if (port == 0 || ntohs(inet->dport) == port || + ntohs(inet->sport) == port) { + printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %#x %#x %u %u %u\n", + NIPQUAD(inet->saddr), ntohs(inet->sport), + NIPQUAD(inet->daddr), ntohs(inet->dport), + size, tp->snd_nxt, tp->snd_una, + tp->snd_cwnd, tcp_current_ssthresh(sk), + tp->snd_wnd); + } + + jprobe_return(); + return 0; +} + +static struct jprobe tcp_send_probe = { + .kp = { .addr = (kprobe_opcode_t *) &tcp_sendmsg, }, + .entry = (kprobe_opcode_t *) &jtcp_sendmsg, +}; + + +static int tcpprobe_open(struct inode * inode, struct file * file) +{ + kfifo_reset(tcpw.fifo); + do_gettimeofday(&tcpw.tstart); + return 0; +} + +static ssize_t tcpprobe_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + int error = 0, cnt; + unsigned char *tbuf; + + if (!buf || len < 0) + return -EINVAL; + + if (len == 0) + return 0; + + tbuf = vmalloc(len); + if (!tbuf) + return -ENOMEM; + + error = wait_event_interruptible(tcpw.wait, + __kfifo_len(tcpw.fifo) != 0); + if (error) + return error; + + cnt = kfifo_get(tcpw.fifo, tbuf, len); + error = copy_to_user(buf, tbuf, cnt); + + vfree(tbuf); + + return error ? error : cnt; +} + +static struct file_operations tcpprobe_fops = { + .owner = THIS_MODULE, + .open = tcpprobe_open, + .read = tcpprobe_read, +}; + +static __init int tcpprobe_init(void) +{ + int ret = -ENOMEM; + + init_waitqueue_head(&tcpw.wait); + spin_lock_init(&tcpw.lock); + tcpw.fifo = kfifo_alloc(bufsize, GFP_KERNEL, &tcpw.lock); + + if (!proc_net_fops_create(procname, S_IRUSR, &tcpprobe_fops)) + goto err0; + + ret = register_jprobe(&tcp_send_probe); + if (ret) + goto err1; + + pr_info("TCP watch registered (port=%d)\n", port); + return 0; + err1: + proc_net_remove(procname); + err0: + kfifo_free(tcpw.fifo); + return ret; +} +module_init(tcpprobe_init); + +static __exit void tcpprobe_exit(void) +{ + kfifo_free(tcpw.fifo); + proc_net_remove(procname); + unregister_jprobe(&tcp_send_probe); + +} +module_exit(tcpprobe_exit); diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index 26d7486ee501..4624501e9680 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c @@ -5,7 +5,6 @@ * John Heffner <jheffner@sc.edu> */ -#include <linux/config.h> #include <linux/module.h> #include <net/tcp.h> diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 3b7403495052..490360b5b4bf 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -31,7 +31,6 @@ * assumed senders never went idle. */ -#include <linux/config.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/skbuff.h> diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c new file mode 100644 index 000000000000..11b42a7135c1 --- /dev/null +++ b/net/ipv4/tcp_veno.c @@ -0,0 +1,231 @@ +/* + * TCP Veno congestion control + * + * This is based on the congestion detection/avoidance scheme described in + * C. P. Fu, S. C. Liew. + * "TCP Veno: TCP Enhancement for Transmission over Wireless Access Networks." + * IEEE Journal on Selected Areas in Communication, + * Feb. 2003. + * See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf + */ + +#include <linux/config.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/inet_diag.h> + +#include <net/tcp.h> + +/* Default values of the Veno variables, in fixed-point representation + * with V_PARAM_SHIFT bits to the right of the binary point. + */ +#define V_PARAM_SHIFT 1 +static const int beta = 3 << V_PARAM_SHIFT; + +/* Veno variables */ +struct veno { + u8 doing_veno_now; /* if true, do veno for this rtt */ + u16 cntrtt; /* # of rtts measured within last rtt */ + u32 minrtt; /* min of rtts measured within last rtt (in usec) */ + u32 basertt; /* the min of all Veno rtt measurements seen (in usec) */ + u32 inc; /* decide whether to increase cwnd */ + u32 diff; /* calculate the diff rate */ +}; + +/* There are several situations when we must "re-start" Veno: + * + * o when a connection is established + * o after an RTO + * o after fast recovery + * o when we send a packet and there is no outstanding + * unacknowledged data (restarting an idle connection) + * + */ +static inline void veno_enable(struct sock *sk) +{ + struct veno *veno = inet_csk_ca(sk); + + /* turn on Veno */ + veno->doing_veno_now = 1; + + veno->minrtt = 0x7fffffff; +} + +static inline void veno_disable(struct sock *sk) +{ + struct veno *veno = inet_csk_ca(sk); + + /* turn off Veno */ + veno->doing_veno_now = 0; +} + +static void tcp_veno_init(struct sock *sk) +{ + struct veno *veno = inet_csk_ca(sk); + + veno->basertt = 0x7fffffff; + veno->inc = 1; + veno_enable(sk); +} + +/* Do rtt sampling needed for Veno. */ +static void tcp_veno_rtt_calc(struct sock *sk, u32 usrtt) +{ + struct veno *veno = inet_csk_ca(sk); + u32 vrtt = usrtt + 1; /* Never allow zero rtt or basertt */ + + /* Filter to find propagation delay: */ + if (vrtt < veno->basertt) + veno->basertt = vrtt; + + /* Find the min rtt during the last rtt to find + * the current prop. delay + queuing delay: + */ + veno->minrtt = min(veno->minrtt, vrtt); + veno->cntrtt++; +} + +static void tcp_veno_state(struct sock *sk, u8 ca_state) +{ + if (ca_state == TCP_CA_Open) + veno_enable(sk); + else + veno_disable(sk); +} + +/* + * If the connection is idle and we are restarting, + * then we don't want to do any Veno calculations + * until we get fresh rtt samples. So when we + * restart, we reset our Veno state to a clean + * state. After we get acks for this flight of + * packets, _then_ we can make Veno calculations + * again. + */ +static void tcp_veno_cwnd_event(struct sock *sk, enum tcp_ca_event event) +{ + if (event == CA_EVENT_CWND_RESTART || event == CA_EVENT_TX_START) + tcp_veno_init(sk); +} + +static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, + u32 seq_rtt, u32 in_flight, int flag) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct veno *veno = inet_csk_ca(sk); + + if (!veno->doing_veno_now) + return tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, flag); + + /* limited by applications */ + if (!tcp_is_cwnd_limited(sk, in_flight)) + return; + + /* We do the Veno calculations only if we got enough rtt samples */ + if (veno->cntrtt <= 2) { + /* We don't have enough rtt samples to do the Veno + * calculation, so we'll behave like Reno. + */ + tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, flag); + } else { + u32 rtt, target_cwnd; + + /* We have enough rtt samples, so, using the Veno + * algorithm, we determine the state of the network. + */ + + rtt = veno->minrtt; + + target_cwnd = ((tp->snd_cwnd * veno->basertt) + << V_PARAM_SHIFT) / rtt; + + veno->diff = (tp->snd_cwnd << V_PARAM_SHIFT) - target_cwnd; + + if (tp->snd_cwnd <= tp->snd_ssthresh) { + /* Slow start. */ + tcp_slow_start(tp); + } else { + /* Congestion avoidance. */ + if (veno->diff < beta) { + /* In the "non-congestive state", increase cwnd + * every rtt. + */ + if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { + if (tp->snd_cwnd < tp->snd_cwnd_clamp) + tp->snd_cwnd++; + tp->snd_cwnd_cnt = 0; + } else + tp->snd_cwnd_cnt++; + } else { + /* In the "congestive state", increase cwnd + * every other rtt. + */ + if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { + if (veno->inc + && tp->snd_cwnd < + tp->snd_cwnd_clamp) { + tp->snd_cwnd++; + veno->inc = 0; + } else + veno->inc = 1; + tp->snd_cwnd_cnt = 0; + } else + tp->snd_cwnd_cnt++; + } + + } + if (tp->snd_cwnd < 2) + tp->snd_cwnd = 2; + else if (tp->snd_cwnd > tp->snd_cwnd_clamp) + tp->snd_cwnd = tp->snd_cwnd_clamp; + } + /* Wipe the slate clean for the next rtt. */ + /* veno->cntrtt = 0; */ + veno->minrtt = 0x7fffffff; +} + +/* Veno MD phase */ +static u32 tcp_veno_ssthresh(struct sock *sk) +{ + const struct tcp_sock *tp = tcp_sk(sk); + struct veno *veno = inet_csk_ca(sk); + + if (veno->diff < beta) + /* in "non-congestive state", cut cwnd by 1/5 */ + return max(tp->snd_cwnd * 4 / 5, 2U); + else + /* in "congestive state", cut cwnd by 1/2 */ + return max(tp->snd_cwnd >> 1U, 2U); +} + +static struct tcp_congestion_ops tcp_veno = { + .init = tcp_veno_init, + .ssthresh = tcp_veno_ssthresh, + .cong_avoid = tcp_veno_cong_avoid, + .rtt_sample = tcp_veno_rtt_calc, + .set_state = tcp_veno_state, + .cwnd_event = tcp_veno_cwnd_event, + + .owner = THIS_MODULE, + .name = "veno", +}; + +static int __init tcp_veno_register(void) +{ + BUG_ON(sizeof(struct veno) > ICSK_CA_PRIV_SIZE); + tcp_register_congestion_control(&tcp_veno); + return 0; +} + +static void __exit tcp_veno_unregister(void) +{ + tcp_unregister_congestion_control(&tcp_veno); +} + +module_init(tcp_veno_register); +module_exit(tcp_veno_unregister); + +MODULE_AUTHOR("Bin Zhou, Cheng Peng Fu"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("TCP Veno"); diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index 0c340c3756c2..5446312ffd2a 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -1,10 +1,26 @@ /* - * TCP Westwood+ + * TCP Westwood+: end-to-end bandwidth estimation for TCP * - * Angelo Dell'Aera: TCP Westwood+ support + * Angelo Dell'Aera: author of the first version of TCP Westwood+ in Linux 2.4 + * + * Support at http://c3lab.poliba.it/index.php/Westwood + * Main references in literature: + * + * - Mascolo S, Casetti, M. Gerla et al. + * "TCP Westwood: bandwidth estimation for TCP" Proc. ACM Mobicom 2001 + * + * - A. Grieco, s. Mascolo + * "Performance evaluation of New Reno, Vegas, Westwood+ TCP" ACM Computer + * Comm. Review, 2004 + * + * - A. Dell'Aera, L. Grieco, S. Mascolo. + * "Linux 2.4 Implementation of Westwood+ TCP with Rate-Halving : + * A Performance Evaluation Over the Internet" (ICC 2004), Paris, June 2004 + * + * Westwood+ employs end-to-end bandwidth measurement to set cwnd and + * ssthresh after packet loss. The probing phase is as the original Reno. */ -#include <linux/config.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/skbuff.h> @@ -22,6 +38,8 @@ struct westwood { u32 accounted; u32 rtt; u32 rtt_min; /* minimum observed RTT */ + u8 first_ack; /* flag which infers that this is the first ack */ + u8 reset_rtt_min; /* Reset RTT min to next RTT sample*/ }; @@ -49,9 +67,11 @@ static void tcp_westwood_init(struct sock *sk) w->bw_est = 0; w->accounted = 0; w->cumul_ack = 0; + w->reset_rtt_min = 1; w->rtt_min = w->rtt = TCP_WESTWOOD_INIT_RTT; w->rtt_win_sx = tcp_time_stamp; w->snd_una = tcp_sk(sk)->snd_una; + w->first_ack = 1; } /* @@ -63,10 +83,16 @@ static inline u32 westwood_do_filter(u32 a, u32 b) return (((7 * a) + b) >> 3); } -static inline void westwood_filter(struct westwood *w, u32 delta) +static void westwood_filter(struct westwood *w, u32 delta) { - w->bw_ns_est = westwood_do_filter(w->bw_ns_est, w->bk / delta); - w->bw_est = westwood_do_filter(w->bw_est, w->bw_ns_est); + /* If the filter is empty fill it with the first sample of bandwidth */ + if (w->bw_ns_est == 0 && w->bw_est == 0) { + w->bw_ns_est = w->bk / delta; + w->bw_est = w->bw_ns_est; + } else { + w->bw_ns_est = westwood_do_filter(w->bw_ns_est, w->bk / delta); + w->bw_est = westwood_do_filter(w->bw_est, w->bw_ns_est); + } } /* @@ -91,6 +117,15 @@ static void westwood_update_window(struct sock *sk) struct westwood *w = inet_csk_ca(sk); s32 delta = tcp_time_stamp - w->rtt_win_sx; + /* Initialize w->snd_una with the first acked sequence number in order + * to fix mismatch between tp->snd_una and w->snd_una for the first + * bandwidth sample + */ + if (w->first_ack) { + w->snd_una = tcp_sk(sk)->snd_una; + w->first_ack = 0; + } + /* * See if a RTT-window has passed. * Be careful since if RTT is less than @@ -108,6 +143,16 @@ static void westwood_update_window(struct sock *sk) } } +static inline void update_rtt_min(struct westwood *w) +{ + if (w->reset_rtt_min) { + w->rtt_min = w->rtt; + w->reset_rtt_min = 0; + } else + w->rtt_min = min(w->rtt, w->rtt_min); +} + + /* * @westwood_fast_bw * It is called when we are in fast path. In particular it is called when @@ -123,7 +168,7 @@ static inline void westwood_fast_bw(struct sock *sk) w->bk += tp->snd_una - w->snd_una; w->snd_una = tp->snd_una; - w->rtt_min = min(w->rtt, w->rtt_min); + update_rtt_min(w); } /* @@ -162,12 +207,6 @@ static inline u32 westwood_acked_count(struct sock *sk) return w->cumul_ack; } -static inline u32 westwood_bw_rttmin(const struct sock *sk) -{ - const struct tcp_sock *tp = tcp_sk(sk); - const struct westwood *w = inet_csk_ca(sk); - return max_t(u32, (w->bw_est * w->rtt_min) / tp->mss_cache, 2); -} /* * TCP Westwood @@ -175,9 +214,11 @@ static inline u32 westwood_bw_rttmin(const struct sock *sk) * in packets we use mss_cache). Rttmin is guaranteed to be >= 2 * so avoids ever returning 0. */ -static u32 tcp_westwood_cwnd_min(struct sock *sk) +static u32 tcp_westwood_bw_rttmin(const struct sock *sk) { - return westwood_bw_rttmin(sk); + const struct tcp_sock *tp = tcp_sk(sk); + const struct westwood *w = inet_csk_ca(sk); + return max_t(u32, (w->bw_est * w->rtt_min) / tp->mss_cache, 2); } static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event) @@ -191,17 +232,19 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event) break; case CA_EVENT_COMPLETE_CWR: - tp->snd_cwnd = tp->snd_ssthresh = westwood_bw_rttmin(sk); + tp->snd_cwnd = tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); break; case CA_EVENT_FRTO: - tp->snd_ssthresh = westwood_bw_rttmin(sk); + tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); + /* Update RTT_min when next ack arrives */ + w->reset_rtt_min = 1; break; case CA_EVENT_SLOW_ACK: westwood_update_window(sk); w->bk += westwood_acked_count(sk); - w->rtt_min = min(w->rtt, w->rtt_min); + update_rtt_min(w); break; default: @@ -235,7 +278,7 @@ static struct tcp_congestion_ops tcp_westwood = { .init = tcp_westwood_init, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_reno_cong_avoid, - .min_cwnd = tcp_westwood_cwnd_min, + .min_cwnd = tcp_westwood_bw_rttmin, .cwnd_event = tcp_westwood_event, .get_info = tcp_westwood_info, .pkts_acked = tcp_westwood_pkts_acked, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 3f93292b0ad8..9bfcddad695b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -91,7 +91,6 @@ #include <linux/errno.h> #include <linux/timer.h> #include <linux/mm.h> -#include <linux/config.h> #include <linux/inet.h> #include <linux/ipv6.h> #include <linux/netdevice.h> diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 3e174c83bfe7..817ed84511a6 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -13,7 +13,6 @@ #include <linux/string.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> -#include <net/inet_ecn.h> #include <net/ip.h> #include <net/xfrm.h> @@ -24,15 +23,6 @@ int xfrm4_rcv(struct sk_buff *skb) EXPORT_SYMBOL(xfrm4_rcv); -static inline void ipip_ecn_decapsulate(struct sk_buff *skb) -{ - struct iphdr *outer_iph = skb->nh.iph; - struct iphdr *inner_iph = skb->h.ipiph; - - if (INET_ECN_is_ce(outer_iph->tos)) - IP_ECN_set_ce(inner_iph); -} - static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq) { switch (nexthdr) { @@ -113,24 +103,10 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) xfrm_vec[xfrm_nr++] = x; - iph = skb->nh.iph; + if (x->mode->input(x, skb)) + goto drop; if (x->props.mode) { - if (iph->protocol != IPPROTO_IPIP) - goto drop; - if (!pskb_may_pull(skb, sizeof(struct iphdr))) - goto drop; - if (skb_cloned(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) - goto drop; - if (x->props.flags & XFRM_STATE_DECAP_DSCP) - ipv4_copy_dscp(iph, skb->h.ipiph); - if (!(x->props.flags & XFRM_STATE_NOECN)) - ipip_ecn_decapsulate(skb); - skb->mac.raw = memmove(skb->data - skb->mac_len, - skb->mac.raw, skb->mac_len); - skb->nh.raw = skb->data; - memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); decaps = 1; break; } diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c new file mode 100644 index 000000000000..a9e6b3dd19c9 --- /dev/null +++ b/net/ipv4/xfrm4_mode_transport.c @@ -0,0 +1,83 @@ +/* + * xfrm4_mode_transport.c - Transport mode encapsulation for IPv4. + * + * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au> + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/stringify.h> +#include <net/dst.h> +#include <net/ip.h> +#include <net/xfrm.h> + +/* Add encapsulation header. + * + * The IP header will be moved forward to make space for the encapsulation + * header. + * + * On exit, skb->h will be set to the start of the payload to be processed + * by x->type->output and skb->nh will be set to the top IP header. + */ +static int xfrm4_transport_output(struct sk_buff *skb) +{ + struct xfrm_state *x; + struct iphdr *iph; + int ihl; + + iph = skb->nh.iph; + skb->h.ipiph = iph; + + ihl = iph->ihl * 4; + skb->h.raw += ihl; + + x = skb->dst->xfrm; + skb->nh.raw = memmove(skb_push(skb, x->props.header_len), iph, ihl); + return 0; +} + +/* Remove encapsulation header. + * + * The IP header will be moved over the top of the encapsulation header. + * + * On entry, skb->h shall point to where the IP header should be and skb->nh + * shall be set to where the IP header currently is. skb->data shall point + * to the start of the payload. + */ +static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) +{ + int ihl = skb->data - skb->h.raw; + + if (skb->h.raw != skb->nh.raw) + skb->nh.raw = memmove(skb->h.raw, skb->nh.raw, ihl); + skb->nh.iph->tot_len = htons(skb->len + ihl); + skb->h.raw = skb->data; + return 0; +} + +static struct xfrm_mode xfrm4_transport_mode = { + .input = xfrm4_transport_input, + .output = xfrm4_transport_output, + .owner = THIS_MODULE, + .encap = XFRM_MODE_TRANSPORT, +}; + +static int __init xfrm4_transport_init(void) +{ + return xfrm_register_mode(&xfrm4_transport_mode, AF_INET); +} + +static void __exit xfrm4_transport_exit(void) +{ + int err; + + err = xfrm_unregister_mode(&xfrm4_transport_mode, AF_INET); + BUG_ON(err); +} + +module_init(xfrm4_transport_init); +module_exit(xfrm4_transport_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_XFRM_MODE(AF_INET, XFRM_MODE_TRANSPORT); diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c new file mode 100644 index 000000000000..f8d880beb12f --- /dev/null +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -0,0 +1,125 @@ +/* + * xfrm4_mode_tunnel.c - Tunnel mode encapsulation for IPv4. + * + * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au> + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/stringify.h> +#include <net/dst.h> +#include <net/inet_ecn.h> +#include <net/ip.h> +#include <net/xfrm.h> + +static inline void ipip_ecn_decapsulate(struct sk_buff *skb) +{ + struct iphdr *outer_iph = skb->nh.iph; + struct iphdr *inner_iph = skb->h.ipiph; + + if (INET_ECN_is_ce(outer_iph->tos)) + IP_ECN_set_ce(inner_iph); +} + +/* Add encapsulation header. + * + * The top IP header will be constructed per RFC 2401. The following fields + * in it shall be filled in by x->type->output: + * tot_len + * check + * + * On exit, skb->h will be set to the start of the payload to be processed + * by x->type->output and skb->nh will be set to the top IP header. + */ +static int xfrm4_tunnel_output(struct sk_buff *skb) +{ + struct dst_entry *dst = skb->dst; + struct xfrm_state *x = dst->xfrm; + struct iphdr *iph, *top_iph; + int flags; + + iph = skb->nh.iph; + skb->h.ipiph = iph; + + skb->nh.raw = skb_push(skb, x->props.header_len); + top_iph = skb->nh.iph; + + top_iph->ihl = 5; + top_iph->version = 4; + + /* DS disclosed */ + top_iph->tos = INET_ECN_encapsulate(iph->tos, iph->tos); + + flags = x->props.flags; + if (flags & XFRM_STATE_NOECN) + IP_ECN_clear(top_iph); + + top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? + 0 : (iph->frag_off & htons(IP_DF)); + if (!top_iph->frag_off) + __ip_select_ident(top_iph, dst->child, 0); + + top_iph->ttl = dst_metric(dst->child, RTAX_HOPLIMIT); + + top_iph->saddr = x->props.saddr.a4; + top_iph->daddr = x->id.daddr.a4; + top_iph->protocol = IPPROTO_IPIP; + + memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); + return 0; +} + +static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) +{ + struct iphdr *iph = skb->nh.iph; + int err = -EINVAL; + + if (iph->protocol != IPPROTO_IPIP) + goto out; + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + goto out; + + if (skb_cloned(skb) && + (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + goto out; + + if (x->props.flags & XFRM_STATE_DECAP_DSCP) + ipv4_copy_dscp(iph, skb->h.ipiph); + if (!(x->props.flags & XFRM_STATE_NOECN)) + ipip_ecn_decapsulate(skb); + skb->mac.raw = memmove(skb->data - skb->mac_len, + skb->mac.raw, skb->mac_len); + skb->nh.raw = skb->data; + memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); + err = 0; + +out: + return err; +} + +static struct xfrm_mode xfrm4_tunnel_mode = { + .input = xfrm4_tunnel_input, + .output = xfrm4_tunnel_output, + .owner = THIS_MODULE, + .encap = XFRM_MODE_TUNNEL, +}; + +static int __init xfrm4_tunnel_init(void) +{ + return xfrm_register_mode(&xfrm4_tunnel_mode, AF_INET); +} + +static void __exit xfrm4_tunnel_exit(void) +{ + int err; + + err = xfrm_unregister_mode(&xfrm4_tunnel_mode, AF_INET); + BUG_ON(err); +} + +module_init(xfrm4_tunnel_init); +module_exit(xfrm4_tunnel_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_XFRM_MODE(AF_INET, XFRM_MODE_TUNNEL); diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 4ef8efaf6a67..d16f863cf687 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -9,70 +9,15 @@ */ #include <linux/compiler.h> +#include <linux/if_ether.h> +#include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/spinlock.h> #include <linux/netfilter_ipv4.h> -#include <net/inet_ecn.h> #include <net/ip.h> #include <net/xfrm.h> #include <net/icmp.h> -/* Add encapsulation header. - * - * In transport mode, the IP header will be moved forward to make space - * for the encapsulation header. - * - * In tunnel mode, the top IP header will be constructed per RFC 2401. - * The following fields in it shall be filled in by x->type->output: - * tot_len - * check - * - * On exit, skb->h will be set to the start of the payload to be processed - * by x->type->output and skb->nh will be set to the top IP header. - */ -static void xfrm4_encap(struct sk_buff *skb) -{ - struct dst_entry *dst = skb->dst; - struct xfrm_state *x = dst->xfrm; - struct iphdr *iph, *top_iph; - int flags; - - iph = skb->nh.iph; - skb->h.ipiph = iph; - - skb->nh.raw = skb_push(skb, x->props.header_len); - top_iph = skb->nh.iph; - - if (!x->props.mode) { - skb->h.raw += iph->ihl*4; - memmove(top_iph, iph, iph->ihl*4); - return; - } - - top_iph->ihl = 5; - top_iph->version = 4; - - /* DS disclosed */ - top_iph->tos = INET_ECN_encapsulate(iph->tos, iph->tos); - - flags = x->props.flags; - if (flags & XFRM_STATE_NOECN) - IP_ECN_clear(top_iph); - - top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? - 0 : (iph->frag_off & htons(IP_DF)); - if (!top_iph->frag_off) - __ip_select_ident(top_iph, dst->child, 0); - - top_iph->ttl = dst_metric(dst->child, RTAX_HOPLIMIT); - - top_iph->saddr = x->props.saddr.a4; - top_iph->daddr = x->id.daddr.a4; - top_iph->protocol = IPPROTO_IPIP; - - memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); -} - static int xfrm4_tunnel_check_size(struct sk_buff *skb) { int mtu, ret = 0; @@ -121,7 +66,9 @@ static int xfrm4_output_one(struct sk_buff *skb) if (err) goto error; - xfrm4_encap(skb); + err = x->mode->output(skb); + if (err) + goto error; err = x->type->output(x, skb); if (err) @@ -152,16 +99,10 @@ error_nolock: goto out_exit; } -static int xfrm4_output_finish(struct sk_buff *skb) +static int xfrm4_output_finish2(struct sk_buff *skb) { int err; -#ifdef CONFIG_NETFILTER - if (!skb->dst->xfrm) { - IPCB(skb)->flags |= IPSKB_REROUTED; - return dst_output(skb); - } -#endif while (likely((err = xfrm4_output_one(skb)) == 0)) { nf_reset(skb); @@ -174,7 +115,7 @@ static int xfrm4_output_finish(struct sk_buff *skb) return dst_output(skb); err = nf_hook(PF_INET, NF_IP_POST_ROUTING, &skb, NULL, - skb->dst->dev, xfrm4_output_finish); + skb->dst->dev, xfrm4_output_finish2); if (unlikely(err != 1)) break; } @@ -182,6 +123,48 @@ static int xfrm4_output_finish(struct sk_buff *skb) return err; } +static int xfrm4_output_finish(struct sk_buff *skb) +{ + struct sk_buff *segs; + +#ifdef CONFIG_NETFILTER + if (!skb->dst->xfrm) { + IPCB(skb)->flags |= IPSKB_REROUTED; + return dst_output(skb); + } +#endif + + if (!skb_is_gso(skb)) + return xfrm4_output_finish2(skb); + + skb->protocol = htons(ETH_P_IP); + segs = skb_gso_segment(skb, 0); + kfree_skb(skb); + if (unlikely(IS_ERR(segs))) + return PTR_ERR(segs); + + do { + struct sk_buff *nskb = segs->next; + int err; + + segs->next = NULL; + err = xfrm4_output_finish2(segs); + + if (unlikely(err)) { + while ((segs = nskb)) { + nskb = segs->next; + segs->next = NULL; + kfree_skb(segs); + } + return err; + } + + segs = nskb; + } while (segs); + + return 0; +} + int xfrm4_output(struct sk_buff *skb) { return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev, diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 8604c747bca5..8f50eae47d03 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -9,7 +9,6 @@ */ #include <linux/compiler.h> -#include <linux/config.h> #include <linux/inetdevice.h> #include <net/xfrm.h> #include <net/ip.h> @@ -17,8 +16,6 @@ static struct dst_ops xfrm4_dst_ops; static struct xfrm_policy_afinfo xfrm4_policy_afinfo; -static struct xfrm_type_map xfrm4_type_map = { .lock = RW_LOCK_UNLOCKED }; - static int xfrm4_dst_lookup(struct xfrm_dst **dst, struct flowi *fl) { return __ip_route_output_key((struct rtable**)dst, fl); @@ -237,9 +234,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl) static inline int xfrm4_garbage_collect(void) { - read_lock(&xfrm4_policy_afinfo.lock); xfrm4_policy_afinfo.garbage_collect(); - read_unlock(&xfrm4_policy_afinfo.lock); return (atomic_read(&xfrm4_dst_ops.entries) > xfrm4_dst_ops.gc_thresh*2); } @@ -299,8 +294,6 @@ static struct dst_ops xfrm4_dst_ops = { static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { .family = AF_INET, - .lock = RW_LOCK_UNLOCKED, - .type_map = &xfrm4_type_map, .dst_ops = &xfrm4_dst_ops, .dst_lookup = xfrm4_dst_lookup, .find_bundle = __xfrm4_find_bundle, diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index dbabf81a9b7b..81e1751c966e 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -131,7 +131,6 @@ __xfrm4_find_acq(u8 mode, u32 reqid, u8 proto, static struct xfrm_state_afinfo xfrm4_state_afinfo = { .family = AF_INET, - .lock = RW_LOCK_UNLOCKED, .init_flags = xfrm4_init_flags, .init_tempsel = __xfrm4_init_tempsel, .state_lookup = __xfrm4_state_lookup, diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index f8a107ab5592..e923d4dea418 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -106,6 +106,26 @@ config INET6_TUNNEL tristate default n +config INET6_XFRM_MODE_TRANSPORT + tristate "IPv6: IPsec transport mode" + depends on IPV6 + default IPV6 + select XFRM + ---help--- + Support for IPsec transport mode. + + If unsure, say Y. + +config INET6_XFRM_MODE_TUNNEL + tristate "IPv6: IPsec tunnel mode" + depends on IPV6 + default IPV6 + select XFRM + ---help--- + Support for IPsec tunnel mode. + + If unsure, say Y. + config IPV6_TUNNEL tristate "IPv6: IPv6-in-IPv6 tunnel" select INET6_TUNNEL diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index a760b0988fbb..386e0a626948 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -20,6 +20,8 @@ obj-$(CONFIG_INET6_ESP) += esp6.o obj-$(CONFIG_INET6_IPCOMP) += ipcomp6.o obj-$(CONFIG_INET6_XFRM_TUNNEL) += xfrm6_tunnel.o obj-$(CONFIG_INET6_TUNNEL) += tunnel6.o +obj-$(CONFIG_INET6_XFRM_MODE_TRANSPORT) += xfrm6_mode_transport.o +obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 445006ee4522..2316a4315a18 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -40,7 +40,6 @@ * status etc. */ -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> @@ -509,6 +508,26 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) kfree(ifp); } +static void +ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp) +{ + struct inet6_ifaddr *ifa, **ifap; + int ifp_scope = ipv6_addr_src_scope(&ifp->addr); + + /* + * Each device address list is sorted in order of scope - + * global before linklocal. + */ + for (ifap = &idev->addr_list; (ifa = *ifap) != NULL; + ifap = &ifa->if_next) { + if (ifp_scope >= ipv6_addr_src_scope(&ifa->addr)) + break; + } + + ifp->if_next = *ifap; + *ifap = ifp; +} + /* On success it returns ifp with increased reference count */ static struct inet6_ifaddr * @@ -574,8 +593,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, write_lock(&idev->lock); /* Add to inet6_dev unicast addr list. */ - ifa->if_next = idev->addr_list; - idev->addr_list = ifa; + ipv6_link_dev_addr(idev, ifa); #ifdef CONFIG_IPV6_PRIVACY if (ifa->flags&IFA_F_TEMPORARY) { @@ -862,6 +880,8 @@ static int inline ipv6_saddr_label(const struct in6_addr *addr, int type) * 2002::/16 2 * ::/96 3 * ::ffff:0:0/96 4 + * fc00::/7 5 + * 2001::/32 6 */ if (type & IPV6_ADDR_LOOPBACK) return 0; @@ -869,8 +889,12 @@ static int inline ipv6_saddr_label(const struct in6_addr *addr, int type) return 3; else if (type & IPV6_ADDR_MAPPED) return 4; + else if (addr->s6_addr32[0] == htonl(0x20010000)) + return 6; else if (addr->s6_addr16[0] == htons(0x2002)) return 2; + else if ((addr->s6_addr[0] & 0xfe) == 0xfc) + return 5; return 1; } @@ -982,7 +1006,7 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, continue; } else if (score.scope < hiscore.scope) { if (score.scope < daddr_scope) - continue; + break; /* addresses sorted by scope */ else { score.rule = 2; goto record_it; @@ -1069,6 +1093,9 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, if (hiscore.attrs & IPV6_SADDR_SCORE_PRIVACY) continue; } +#else + if (hiscore.rule < 7) + hiscore.rule++; #endif /* Rule 8: Use longest matching prefix */ if (hiscore.rule < 8) { @@ -2860,6 +2887,11 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen); } +/* Maximum length of ifa_cacheinfo attributes */ +#define INET6_IFADDR_RTA_SPACE \ + RTA_SPACE(16) /* IFA_ADDRESS */ + \ + RTA_SPACE(sizeof(struct ifa_cacheinfo)) /* CACHEINFO */ + static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, u32 pid, u32 seq, int event, unsigned int flags) { @@ -3092,7 +3124,7 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) { struct sk_buff *skb; - int size = NLMSG_SPACE(sizeof(struct ifaddrmsg)+128); + int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + INET6_IFADDR_RTA_SPACE); skb = alloc_skb(size, GFP_ATOMIC); if (!skb) { @@ -3142,6 +3174,17 @@ static void inline ipv6_store_devconf(struct ipv6_devconf *cnf, #endif } +/* Maximum length of ifinfomsg attributes */ +#define INET6_IFINFO_RTA_SPACE \ + RTA_SPACE(IFNAMSIZ) /* IFNAME */ + \ + RTA_SPACE(MAX_ADDR_LEN) /* ADDRESS */ + \ + RTA_SPACE(sizeof(u32)) /* MTU */ + \ + RTA_SPACE(sizeof(int)) /* LINK */ + \ + RTA_SPACE(0) /* PROTINFO */ + \ + RTA_SPACE(sizeof(u32)) /* FLAGS */ + \ + RTA_SPACE(sizeof(struct ifla_cacheinfo)) /* CACHEINFO */ + \ + RTA_SPACE(sizeof(__s32[DEVCONF_MAX])) /* CONF */ + static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, u32 pid, u32 seq, int event, unsigned int flags) { @@ -3235,8 +3278,7 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) void inet6_ifinfo_notify(int event, struct inet6_dev *idev) { struct sk_buff *skb; - /* 128 bytes ?? */ - int size = NLMSG_SPACE(sizeof(struct ifinfomsg)+128); + int size = NLMSG_SPACE(sizeof(struct ifinfomsg) + INET6_IFINFO_RTA_SPACE); skb = alloc_skb(size, GFP_ATOMIC); if (!skb) { @@ -3252,6 +3294,11 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev) netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFINFO, GFP_ATOMIC); } +/* Maximum length of prefix_cacheinfo attributes */ +#define INET6_PREFIX_RTA_SPACE \ + RTA_SPACE(sizeof(((struct prefix_info *)NULL)->prefix)) /* ADDRESS */ + \ + RTA_SPACE(sizeof(struct prefix_cacheinfo)) /* CACHEINFO */ + static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, struct prefix_info *pinfo, u32 pid, u32 seq, int event, unsigned int flags) @@ -3296,7 +3343,7 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev, struct prefix_info *pinfo) { struct sk_buff *skb; - int size = NLMSG_SPACE(sizeof(struct prefixmsg)+128); + int size = NLMSG_SPACE(sizeof(struct prefixmsg) + INET6_PREFIX_RTA_SPACE); skb = alloc_skb(size, GFP_ATOMIC); if (!skb) { diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index e19457fe4f6e..5a0ba58b86cc 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -23,7 +23,6 @@ #include <linux/module.h> #include <linux/capability.h> -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> @@ -660,8 +659,6 @@ int inet6_sk_rebuild_header(struct sock *sk) } ip6_dst_store(sk, dst, NULL); - sk->sk_route_caps = dst->dev->features & - ~(NETIF_F_IP_CSUM | NETIF_F_TSO); } return 0; diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 6778173a3dda..9d4831bd4335 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -24,7 +24,6 @@ * This file is derived from net/ipv4/ah.c. */ -#include <linux/config.h> #include <linux/module.h> #include <net/ip.h> #include <net/ah.h> @@ -292,7 +291,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len); memset(ah->auth_data, 0, ahp->icv_trunc_len); - skb_push(skb, skb->data - skb->nh.raw); + skb_push(skb, hdr_len); ahp->icv(ahp, skb, ah->auth_data); if (memcmp(ah->auth_data, auth_data, ahp->icv_trunc_len)) { LIMIT_NETDEBUG(KERN_WARNING "ipsec ah authentication error\n"); @@ -301,12 +300,8 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) } } - skb->nh.raw = skb_pull(skb, ah_hlen); - memcpy(skb->nh.raw, tmp_hdr, hdr_len); - skb->nh.ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); - skb_pull(skb, hdr_len); - skb->h.raw = skb->data; - + skb->h.raw = memcpy(skb->nh.raw += ah_hlen, tmp_hdr, hdr_len); + __skb_pull(skb, ah_hlen + hdr_len); kfree(tmp_hdr); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 39ec528923f6..f6881d7a0385 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -14,7 +14,6 @@ */ #include <linux/capability.h> -#include <linux/config.h> #include <linux/module.h> #include <linux/errno.h> #include <linux/types.h> diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 22f046079037..a278d5e862fe 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -24,7 +24,6 @@ * This file is derived from net/ipv4/esp.c */ -#include <linux/config.h> #include <linux/module.h> #include <net/ip.h> #include <net/xfrm.h> @@ -142,25 +141,17 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) int hdr_len = skb->h.raw - skb->nh.raw; int nfrags; - unsigned char *tmp_hdr = NULL; int ret = 0; if (!pskb_may_pull(skb, sizeof(struct ipv6_esp_hdr))) { ret = -EINVAL; - goto out_nofree; + goto out; } if (elen <= 0 || (elen & (blksize-1))) { ret = -EINVAL; - goto out_nofree; - } - - tmp_hdr = kmalloc(hdr_len, GFP_ATOMIC); - if (!tmp_hdr) { - ret = -ENOMEM; - goto out_nofree; + goto out; } - memcpy(tmp_hdr, skb->nh.raw, hdr_len); /* If integrity check is required, do this. */ if (esp->auth.icv_full_len) { @@ -222,16 +213,12 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) /* ... check padding bits here. Silly. :-) */ pskb_trim(skb, skb->len - alen - padlen - 2); - skb->h.raw = skb_pull(skb, sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen); - skb->nh.raw += sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen; - memcpy(skb->nh.raw, tmp_hdr, hdr_len); - skb->nh.ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); ret = nexthdr[1]; } + skb->h.raw = __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen) - hdr_len; + out: - kfree(tmp_hdr); -out_nofree: return ret; } diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index a18d4256372c..9d0ee7f0eeb5 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -179,7 +179,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp) static struct inet6_protocol destopt_protocol = { .handler = ipv6_destopt_rcv, - .flags = INET6_PROTO_NOPOLICY, + .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR, }; void __init ipv6_destopt_init(void) @@ -340,7 +340,7 @@ looped_back: static struct inet6_protocol rthdr_protocol = { .handler = ipv6_rthdr_rcv, - .flags = INET6_PROTO_NOPOLICY, + .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR, }; void __init ipv6_rthdr_init(void) diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index eb2865d5ae28..5c950cc79d80 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -13,7 +13,6 @@ * 2 of the License, or(at your option) any later version. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/in6.h> #include <linux/ipv6.h> @@ -187,8 +186,6 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) } ip6_dst_store(sk, dst, NULL); - sk->sk_route_caps = dst->dev->features & - ~(NETIF_F_IP_CSUM | NETIF_F_TSO); } skb->dst = dst_clone(dst); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 2ae84c961678..d2f3fc990bfa 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -14,7 +14,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/random.h> diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 2cb6149349bf..764221220afd 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -19,7 +19,6 @@ * remove ip6_null_entry from the top of * routing table. */ -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/net.h> diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index f9ca63912fbf..1d672b0547f2 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -10,7 +10,6 @@ */ #include <linux/capability.h> -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index aceee252503d..df8f051c0fce 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -84,14 +84,9 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt */ IP6CB(skb)->iif = skb->dst ? ((struct rt6_info *)skb->dst)->rt6i_idev->dev->ifindex : dev->ifindex; - if (skb->len < sizeof(struct ipv6hdr)) + if (unlikely(!pskb_may_pull(skb, sizeof(*hdr)))) goto err; - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) { - IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); - goto drop; - } - hdr = skb->nh.ipv6h; if (hdr->version != 6) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index e46048974f37..3bc74ce78800 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -28,7 +28,6 @@ * for datagram xmit */ -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/string.h> @@ -39,6 +38,7 @@ #include <linux/in6.h> #include <linux/tcp.h> #include <linux/route.h> +#include <linux/module.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> @@ -147,7 +147,7 @@ static int ip6_output2(struct sk_buff *skb) int ip6_output(struct sk_buff *skb) { - if ((skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->ufo_size) || + if ((skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) || dst_allfrag(skb->dst)) return ip6_fragment(skb, ip6_output2); else @@ -229,7 +229,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, skb->priority = sk->sk_priority; mtu = dst_mtu(dst); - if ((skb->len <= mtu) || ipfragok) { + if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) { IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output); @@ -458,6 +458,7 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) nf_bridge_get(to->nf_bridge); #endif #endif + skb_copy_secmark(to, from); } int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) @@ -488,6 +489,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) return offset; } +EXPORT_SYMBOL_GPL(ip6_find_1stfragopt); static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { @@ -830,8 +832,9 @@ static inline int ip6_ufo_append_data(struct sock *sk, struct frag_hdr fhdr; /* specify the length of each IP datagram fragment*/ - skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen) - - sizeof(struct frag_hdr); + skb_shinfo(skb)->gso_size = mtu - fragheaderlen - + sizeof(struct frag_hdr); + skb_shinfo(skb)->gso_type = SKB_GSO_UDP; ipv6_select_ident(skb, &fhdr); skb_shinfo(skb)->ip6_frag_id = fhdr.identification; __skb_queue_tail(&sk->sk_write_queue, skb); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index a995796b5a57..bc77c0e1a943 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -19,7 +19,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/capability.h> #include <linux/errno.h> diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 48636436028a..7e4d1c17bfbc 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -30,7 +30,6 @@ * The decompression of IP datagram MUST be done after the reassembly, * AH/ESP processing. */ -#include <linux/config.h> #include <linux/module.h> #include <net/ip.h> #include <net/xfrm.h> @@ -65,38 +64,25 @@ static LIST_HEAD(ipcomp6_tfms_list); static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb) { - int err = 0; - u8 nexthdr = 0; - int hdr_len = skb->h.raw - skb->nh.raw; - unsigned char *tmp_hdr = NULL; + int err = -ENOMEM; struct ipv6hdr *iph; + struct ipv6_comp_hdr *ipch; int plen, dlen; struct ipcomp_data *ipcd = x->data; u8 *start, *scratch; struct crypto_tfm *tfm; int cpu; - if ((skb_is_nonlinear(skb) || skb_cloned(skb)) && - skb_linearize(skb, GFP_ATOMIC) != 0) { - err = -ENOMEM; + if (skb_linearize_cow(skb)) goto out; - } skb->ip_summed = CHECKSUM_NONE; /* Remove ipcomp header and decompress original payload */ iph = skb->nh.ipv6h; - tmp_hdr = kmalloc(hdr_len, GFP_ATOMIC); - if (!tmp_hdr) - goto out; - memcpy(tmp_hdr, iph, hdr_len); - nexthdr = *(u8 *)skb->data; - skb_pull(skb, sizeof(struct ipv6_comp_hdr)); - skb->nh.raw += sizeof(struct ipv6_comp_hdr); - memcpy(skb->nh.raw, tmp_hdr, hdr_len); - iph = skb->nh.ipv6h; - iph->payload_len = htons(ntohs(iph->payload_len) - sizeof(struct ipv6_comp_hdr)); - skb->h.raw = skb->data; + ipch = (void *)skb->data; + skb->h.raw = skb->nh.raw + sizeof(*ipch); + __skb_pull(skb, sizeof(*ipch)); /* decompression */ plen = skb->len; @@ -123,20 +109,14 @@ static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb) goto out_put_cpu; } - skb_put(skb, dlen - plen); + skb->truesize += dlen - plen; + __skb_put(skb, dlen - plen); memcpy(skb->data, scratch, dlen); + err = ipch->nexthdr; - iph = skb->nh.ipv6h; - iph->payload_len = htons(skb->len); - out_put_cpu: put_cpu(); out: - kfree(tmp_hdr); - if (err) - goto error_out; - return nexthdr; -error_out: return err; } @@ -159,10 +139,8 @@ static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb) goto out_ok; } - if ((skb_is_nonlinear(skb) || skb_cloned(skb)) && - skb_linearize(skb, GFP_ATOMIC) != 0) { + if (skb_linearize_cow(skb)) goto out_ok; - } /* compression */ plen = skb->len - hdr_len; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 4c20eeb3d568..43327264e69c 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -27,7 +27,6 @@ #include <linux/module.h> #include <linux/capability.h> -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> @@ -58,9 +57,113 @@ DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics) __read_mostly; +static struct inet6_protocol *ipv6_gso_pull_exthdrs(struct sk_buff *skb, + int proto) +{ + struct inet6_protocol *ops = NULL; + + for (;;) { + struct ipv6_opt_hdr *opth; + int len; + + if (proto != NEXTHDR_HOP) { + ops = rcu_dereference(inet6_protos[proto]); + + if (unlikely(!ops)) + break; + + if (!(ops->flags & INET6_PROTO_GSO_EXTHDR)) + break; + } + + if (unlikely(!pskb_may_pull(skb, 8))) + break; + + opth = (void *)skb->data; + len = opth->hdrlen * 8 + 8; + + if (unlikely(!pskb_may_pull(skb, len))) + break; + + proto = opth->nexthdr; + __skb_pull(skb, len); + } + + return ops; +} + +static int ipv6_gso_send_check(struct sk_buff *skb) +{ + struct ipv6hdr *ipv6h; + struct inet6_protocol *ops; + int err = -EINVAL; + + if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) + goto out; + + ipv6h = skb->nh.ipv6h; + __skb_pull(skb, sizeof(*ipv6h)); + err = -EPROTONOSUPPORT; + + rcu_read_lock(); + ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); + if (likely(ops && ops->gso_send_check)) { + skb->h.raw = skb->data; + err = ops->gso_send_check(skb); + } + rcu_read_unlock(); + +out: + return err; +} + +static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + struct ipv6hdr *ipv6h; + struct inet6_protocol *ops; + + if (unlikely(skb_shinfo(skb)->gso_type & + ~(SKB_GSO_UDP | + SKB_GSO_DODGY | + SKB_GSO_TCP_ECN | + SKB_GSO_TCPV6 | + 0))) + goto out; + + if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) + goto out; + + ipv6h = skb->nh.ipv6h; + __skb_pull(skb, sizeof(*ipv6h)); + segs = ERR_PTR(-EPROTONOSUPPORT); + + rcu_read_lock(); + ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); + if (likely(ops && ops->gso_segment)) { + skb->h.raw = skb->data; + segs = ops->gso_segment(skb, features); + } + rcu_read_unlock(); + + if (unlikely(IS_ERR(segs))) + goto out; + + for (skb = segs; skb; skb = skb->next) { + ipv6h = skb->nh.ipv6h; + ipv6h->payload_len = htons(skb->len - skb->mac_len - + sizeof(*ipv6h)); + } + +out: + return segs; +} + static struct packet_type ipv6_packet_type = { .type = __constant_htons(ETH_P_IPV6), .func = ipv6_rcv, + .gso_send_check = ipv6_gso_send_check, + .gso_segment = ipv6_gso_segment, }; struct ip6_ra_chain *ip6_ra_chain; diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c index 16482785bdfd..dd4d1ce77769 100644 --- a/net/ipv6/ipv6_syms.c +++ b/net/ipv6/ipv6_syms.c @@ -1,5 +1,4 @@ -#include <linux/config.h> #include <linux/module.h> #include <net/protocol.h> #include <net/ipv6.h> diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 6e871afbb2c7..9d697d4dcffc 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -28,7 +28,6 @@ * - MLDv2 support */ -#include <linux/config.h> #include <linux/module.h> #include <linux/errno.h> #include <linux/types.h> diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index dfa20d3be9b6..b50055b9278d 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -48,7 +48,6 @@ #endif #include <linux/module.h> -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index b4b7d441af25..968a14be0d05 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -505,7 +505,7 @@ ipq_rcv_skb(struct sk_buff *skb) if (type <= IPQM_BASE) return; - if (security_netlink_recv(skb)) + if (security_netlink_recv(skb, CAP_NET_ADMIN)) RCV_SKB_FAIL(-EPERM); write_lock_bh(&queue_lock); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 2e72f89a7019..f26898b00347 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -19,13 +19,13 @@ */ #include <linux/capability.h> -#include <linux/config.h> #include <linux/in.h> #include <linux/skbuff.h> #include <linux/kmod.h> #include <linux/vmalloc.h> #include <linux/netdevice.h> #include <linux/module.h> +#include <linux/poison.h> #include <linux/icmpv6.h> #include <net/ipv6.h> #include <asm/uaccess.h> @@ -377,7 +377,7 @@ ip6t_do_table(struct sk_buff **pskb, } while (!hotdrop); #ifdef CONFIG_NETFILTER_DEBUG - ((struct ip6t_entry *)table_base)->comefrom = 0xdead57ac; + ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON; #endif read_unlock_bh(&table->lock); @@ -1281,7 +1281,8 @@ int ip6t_register_table(struct xt_table *table, return ret; } - if (xt_register_table(table, &bootstrap, newinfo) != 0) { + ret = xt_register_table(table, &bootstrap, newinfo); + if (ret != 0) { xt_free_table_info(newinfo); return ret; } diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index de1175c27f6d..8629ba195d2d 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -15,7 +15,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/icmpv6.h> diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 93bae36f2663..c2ab38ff46af 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -20,7 +20,6 @@ * structures. */ -#include <linux/config.h> #include <linux/types.h> #include <linux/ipv6.h> #include <linux/in6.h> @@ -189,7 +188,7 @@ static unsigned int ipv6_confirm(unsigned int hooknum, /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(*pskb, &ctinfo); - if (!ct) + if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) goto out; help = nfct_help(ct); diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 86c6703265d0..ef18a7b7014b 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -233,7 +233,7 @@ icmpv6_error(struct sk_buff *skb, unsigned int dataoff, return -NF_ACCEPT; } - if (hooknum == NF_IP6_PRE_ROUTING && + if (nf_conntrack_checksum && hooknum == NF_IP6_PRE_ROUTING && nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) { nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, "nf_ct_icmpv6: ICMPv6 checksum failed\n"); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 3e319035f82d..00d5583807f7 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -14,7 +14,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/string.h> @@ -456,13 +455,9 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, DEBUGP("queue: message is too short.\n"); goto err; } - if (end-offset < skb->len) { - if (pskb_trim(skb, end - offset)) { - DEBUGP("Can't trim\n"); - goto err; - } - if (skb->ip_summed != CHECKSUM_UNNECESSARY) - skb->ip_summed = CHECKSUM_NONE; + if (pskb_trim_rcsum(skb, end - offset)) { + DEBUGP("Can't trim\n"); + goto err; } /* Find out which fragments are in front and at the back of us diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 779ddf77f4d4..efee7a6301a8 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -17,7 +17,6 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/sched.h> #include <linux/socket.h> #include <linux/net.h> diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index eef985e010ea..4e299c69e1c6 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -28,7 +28,6 @@ * YOSHIFUJI,H. @USAGI Always remove fragment header to * calculate ICV correctly. */ -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/string.h> diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8a777932786d..87c39c978cd0 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -25,7 +25,6 @@ */ #include <linux/capability.h> -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/times.h> @@ -349,7 +348,7 @@ static struct rt6_info *rt6_select(struct rt6_info **head, int oif, (strict & RT6_SELECT_F_REACHABLE) && last && last != rt0) { /* no entries matched; do round-robin */ - static spinlock_t lock = SPIN_LOCK_UNLOCKED; + static DEFINE_SPINLOCK(lock); spin_lock(&lock); *head = rt0->u.next; rt0->u.next = last->u.next; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 6578c3080f47..c56aeece2bf5 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -18,7 +18,6 @@ * Nate Thompson <nate@thebog.net>: 6to4 support */ -#include <linux/config.h> #include <linux/module.h> #include <linux/capability.h> #include <linux/errno.h> diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 8eff9fa1e983..7a4639db1346 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -7,7 +7,6 @@ #include <linux/mm.h> #include <linux/sysctl.h> -#include <linux/config.h> #include <linux/in6.h> #include <linux/ipv6.h> #include <net/ndisc.h> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 301eee726b0f..923989d0520d 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -26,7 +26,6 @@ */ #include <linux/module.h> -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> @@ -270,9 +269,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, ipv6_addr_copy(&np->saddr, saddr); inet->rcv_saddr = LOOPBACK4_IPV6; + sk->sk_gso_type = SKB_GSO_TCPV6; ip6_dst_store(sk, dst, NULL); - sk->sk_route_caps = dst->dev->features & - ~(NETIF_F_IP_CSUM | NETIF_F_TSO); icsk->icsk_ext_hdr_len = 0; if (np->opt) @@ -554,6 +552,24 @@ static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb) } } +static int tcp_v6_gso_send_check(struct sk_buff *skb) +{ + struct ipv6hdr *ipv6h; + struct tcphdr *th; + + if (!pskb_may_pull(skb, sizeof(*th))) + return -EINVAL; + + ipv6h = skb->nh.ipv6h; + th = skb->h.th; + + th->check = 0; + th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len, + IPPROTO_TCP, 0); + skb->csum = offsetof(struct tcphdr, check); + skb->ip_summed = CHECKSUM_HW; + return 0; +} static void tcp_v6_send_reset(struct sk_buff *skb) { @@ -930,9 +946,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, * comment in that function for the gory details. -acme */ + sk->sk_gso_type = SKB_GSO_TCPV6; ip6_dst_store(newsk, dst, NULL); - newsk->sk_route_caps = dst->dev->features & - ~(NETIF_F_IP_CSUM | NETIF_F_TSO); newtcp6sk = (struct tcp6_sock *)newsk; inet_sk(newsk)->pinet6 = &newtcp6sk->inet6; @@ -1218,8 +1233,16 @@ process: bh_lock_sock(sk); ret = 0; if (!sock_owned_by_user(sk)) { - if (!tcp_prequeue(sk, skb)) - ret = tcp_v6_do_rcv(sk, skb); +#ifdef CONFIG_NET_DMA + struct tcp_sock *tp = tcp_sk(sk); + if (tp->ucopy.dma_chan) + ret = tcp_v6_do_rcv(sk, skb); + else +#endif + { + if (!tcp_prequeue(sk, skb)) + ret = tcp_v6_do_rcv(sk, skb); + } } else sk_add_backlog(sk, skb); bh_unlock_sock(sk); @@ -1461,7 +1484,8 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) dest->s6_addr32[0], dest->s6_addr32[1], dest->s6_addr32[2], dest->s6_addr32[3], destp, sp->sk_state, - tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq, + tp->write_seq-tp->snd_una, + (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq), timer_active, jiffies_to_clock_t(timer_expires - jiffies), icsk->icsk_retransmits, @@ -1597,6 +1621,8 @@ struct proto tcpv6_prot = { static struct inet6_protocol tcpv6_protocol = { .handler = tcp_v6_rcv, .err_handler = tcp_v6_err, + .gso_send_check = tcp_v6_gso_send_check, + .gso_segment = tcp_tso_segment, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 8d3432a70f3a..ccc57f434cd3 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -23,7 +23,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 00cfdee18dca..0405d74ff910 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -13,21 +13,9 @@ #include <linux/string.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> -#include <net/dsfield.h> -#include <net/inet_ecn.h> -#include <net/ip.h> #include <net/ipv6.h> #include <net/xfrm.h> -static inline void ipip6_ecn_decapsulate(struct sk_buff *skb) -{ - struct ipv6hdr *outer_iph = skb->nh.ipv6h; - struct ipv6hdr *inner_iph = skb->h.ipv6h; - - if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph))) - IP6_ECN_set_ce(inner_iph); -} - int xfrm6_rcv_spi(struct sk_buff *skb, u32 spi) { int err; @@ -81,21 +69,10 @@ int xfrm6_rcv_spi(struct sk_buff *skb, u32 spi) xfrm_vec[xfrm_nr++] = x; + if (x->mode->input(x, skb)) + goto drop; + if (x->props.mode) { /* XXX */ - if (nexthdr != IPPROTO_IPV6) - goto drop; - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) - goto drop; - if (skb_cloned(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) - goto drop; - if (x->props.flags & XFRM_STATE_DECAP_DSCP) - ipv6_copy_dscp(skb->nh.ipv6h, skb->h.ipv6h); - if (!(x->props.flags & XFRM_STATE_NOECN)) - ipip6_ecn_decapsulate(skb); - skb->mac.raw = memmove(skb->data - skb->mac_len, - skb->mac.raw, skb->mac_len); - skb->nh.raw = skb->data; decaps = 1; break; } diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c new file mode 100644 index 000000000000..711d713e36d8 --- /dev/null +++ b/net/ipv6/xfrm6_mode_transport.c @@ -0,0 +1,88 @@ +/* + * xfrm6_mode_transport.c - Transport mode encapsulation for IPv6. + * + * Copyright (C) 2002 USAGI/WIDE Project + * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au> + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/stringify.h> +#include <net/dst.h> +#include <net/ipv6.h> +#include <net/xfrm.h> + +/* Add encapsulation header. + * + * The IP header and mutable extension headers will be moved forward to make + * space for the encapsulation header. + * + * On exit, skb->h will be set to the start of the encapsulation header to be + * filled in by x->type->output and skb->nh will be set to the nextheader field + * of the extension header directly preceding the encapsulation header, or in + * its absence, that of the top IP header. The value of skb->data will always + * point to the top IP header. + */ +static int xfrm6_transport_output(struct sk_buff *skb) +{ + struct xfrm_state *x = skb->dst->xfrm; + struct ipv6hdr *iph; + u8 *prevhdr; + int hdr_len; + + skb_push(skb, x->props.header_len); + iph = skb->nh.ipv6h; + + hdr_len = ip6_find_1stfragopt(skb, &prevhdr); + skb->nh.raw = prevhdr - x->props.header_len; + skb->h.raw = skb->data + hdr_len; + memmove(skb->data, iph, hdr_len); + return 0; +} + +/* Remove encapsulation header. + * + * The IP header will be moved over the top of the encapsulation header. + * + * On entry, skb->h shall point to where the IP header should be and skb->nh + * shall be set to where the IP header currently is. skb->data shall point + * to the start of the payload. + */ +static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) +{ + int ihl = skb->data - skb->h.raw; + + if (skb->h.raw != skb->nh.raw) + skb->nh.raw = memmove(skb->h.raw, skb->nh.raw, ihl); + skb->nh.ipv6h->payload_len = htons(skb->len + ihl - + sizeof(struct ipv6hdr)); + skb->h.raw = skb->data; + return 0; +} + +static struct xfrm_mode xfrm6_transport_mode = { + .input = xfrm6_transport_input, + .output = xfrm6_transport_output, + .owner = THIS_MODULE, + .encap = XFRM_MODE_TRANSPORT, +}; + +static int __init xfrm6_transport_init(void) +{ + return xfrm_register_mode(&xfrm6_transport_mode, AF_INET6); +} + +static void __exit xfrm6_transport_exit(void) +{ + int err; + + err = xfrm_unregister_mode(&xfrm6_transport_mode, AF_INET6); + BUG_ON(err); +} + +module_init(xfrm6_transport_init); +module_exit(xfrm6_transport_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_TRANSPORT); diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c new file mode 100644 index 000000000000..8af79be2edca --- /dev/null +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -0,0 +1,121 @@ +/* + * xfrm6_mode_tunnel.c - Tunnel mode encapsulation for IPv6. + * + * Copyright (C) 2002 USAGI/WIDE Project + * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au> + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/stringify.h> +#include <net/dsfield.h> +#include <net/dst.h> +#include <net/inet_ecn.h> +#include <net/ipv6.h> +#include <net/xfrm.h> + +static inline void ipip6_ecn_decapsulate(struct sk_buff *skb) +{ + struct ipv6hdr *outer_iph = skb->nh.ipv6h; + struct ipv6hdr *inner_iph = skb->h.ipv6h; + + if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph))) + IP6_ECN_set_ce(inner_iph); +} + +/* Add encapsulation header. + * + * The top IP header will be constructed per RFC 2401. The following fields + * in it shall be filled in by x->type->output: + * payload_len + * + * On exit, skb->h will be set to the start of the encapsulation header to be + * filled in by x->type->output and skb->nh will be set to the nextheader field + * of the extension header directly preceding the encapsulation header, or in + * its absence, that of the top IP header. The value of skb->data will always + * point to the top IP header. + */ +static int xfrm6_tunnel_output(struct sk_buff *skb) +{ + struct dst_entry *dst = skb->dst; + struct xfrm_state *x = dst->xfrm; + struct ipv6hdr *iph, *top_iph; + int dsfield; + + skb_push(skb, x->props.header_len); + iph = skb->nh.ipv6h; + + skb->nh.raw = skb->data; + top_iph = skb->nh.ipv6h; + skb->nh.raw = &top_iph->nexthdr; + skb->h.ipv6h = top_iph + 1; + + top_iph->version = 6; + top_iph->priority = iph->priority; + top_iph->flow_lbl[0] = iph->flow_lbl[0]; + top_iph->flow_lbl[1] = iph->flow_lbl[1]; + top_iph->flow_lbl[2] = iph->flow_lbl[2]; + dsfield = ipv6_get_dsfield(top_iph); + dsfield = INET_ECN_encapsulate(dsfield, dsfield); + if (x->props.flags & XFRM_STATE_NOECN) + dsfield &= ~INET_ECN_MASK; + ipv6_change_dsfield(top_iph, 0, dsfield); + top_iph->nexthdr = IPPROTO_IPV6; + top_iph->hop_limit = dst_metric(dst->child, RTAX_HOPLIMIT); + ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr); + ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr); + return 0; +} + +static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) +{ + int err = -EINVAL; + + if (skb->nh.raw[IP6CB(skb)->nhoff] != IPPROTO_IPV6) + goto out; + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + goto out; + + if (skb_cloned(skb) && + (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + goto out; + + if (x->props.flags & XFRM_STATE_DECAP_DSCP) + ipv6_copy_dscp(skb->nh.ipv6h, skb->h.ipv6h); + if (!(x->props.flags & XFRM_STATE_NOECN)) + ipip6_ecn_decapsulate(skb); + skb->mac.raw = memmove(skb->data - skb->mac_len, + skb->mac.raw, skb->mac_len); + skb->nh.raw = skb->data; + err = 0; + +out: + return err; +} + +static struct xfrm_mode xfrm6_tunnel_mode = { + .input = xfrm6_tunnel_input, + .output = xfrm6_tunnel_output, + .owner = THIS_MODULE, + .encap = XFRM_MODE_TUNNEL, +}; + +static int __init xfrm6_tunnel_init(void) +{ + return xfrm_register_mode(&xfrm6_tunnel_mode, AF_INET6); +} + +static void __exit xfrm6_tunnel_exit(void) +{ + int err; + + err = xfrm_unregister_mode(&xfrm6_tunnel_mode, AF_INET6); + BUG_ON(err); +} + +module_init(xfrm6_tunnel_init); +module_exit(xfrm6_tunnel_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_TUNNEL); diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 80242172a5df..0eea60ea9ebc 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -14,68 +14,9 @@ #include <linux/spinlock.h> #include <linux/icmpv6.h> #include <linux/netfilter_ipv6.h> -#include <net/dsfield.h> -#include <net/inet_ecn.h> #include <net/ipv6.h> #include <net/xfrm.h> -/* Add encapsulation header. - * - * In transport mode, the IP header and mutable extension headers will be moved - * forward to make space for the encapsulation header. - * - * In tunnel mode, the top IP header will be constructed per RFC 2401. - * The following fields in it shall be filled in by x->type->output: - * payload_len - * - * On exit, skb->h will be set to the start of the encapsulation header to be - * filled in by x->type->output and skb->nh will be set to the nextheader field - * of the extension header directly preceding the encapsulation header, or in - * its absence, that of the top IP header. The value of skb->data will always - * point to the top IP header. - */ -static void xfrm6_encap(struct sk_buff *skb) -{ - struct dst_entry *dst = skb->dst; - struct xfrm_state *x = dst->xfrm; - struct ipv6hdr *iph, *top_iph; - int dsfield; - - skb_push(skb, x->props.header_len); - iph = skb->nh.ipv6h; - - if (!x->props.mode) { - u8 *prevhdr; - int hdr_len; - - hdr_len = ip6_find_1stfragopt(skb, &prevhdr); - skb->nh.raw = prevhdr - x->props.header_len; - skb->h.raw = skb->data + hdr_len; - memmove(skb->data, iph, hdr_len); - return; - } - - skb->nh.raw = skb->data; - top_iph = skb->nh.ipv6h; - skb->nh.raw = &top_iph->nexthdr; - skb->h.ipv6h = top_iph + 1; - - top_iph->version = 6; - top_iph->priority = iph->priority; - top_iph->flow_lbl[0] = iph->flow_lbl[0]; - top_iph->flow_lbl[1] = iph->flow_lbl[1]; - top_iph->flow_lbl[2] = iph->flow_lbl[2]; - dsfield = ipv6_get_dsfield(top_iph); - dsfield = INET_ECN_encapsulate(dsfield, dsfield); - if (x->props.flags & XFRM_STATE_NOECN) - dsfield &= ~INET_ECN_MASK; - ipv6_change_dsfield(top_iph, 0, dsfield); - top_iph->nexthdr = IPPROTO_IPV6; - top_iph->hop_limit = dst_metric(dst->child, RTAX_HOPLIMIT); - ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr); - ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr); -} - static int xfrm6_tunnel_check_size(struct sk_buff *skb) { int mtu, ret = 0; @@ -118,7 +59,9 @@ static int xfrm6_output_one(struct sk_buff *skb) if (err) goto error; - xfrm6_encap(skb); + err = x->mode->output(skb); + if (err) + goto error; err = x->type->output(x, skb); if (err) @@ -151,7 +94,7 @@ error_nolock: goto out_exit; } -static int xfrm6_output_finish(struct sk_buff *skb) +static int xfrm6_output_finish2(struct sk_buff *skb) { int err; @@ -167,7 +110,7 @@ static int xfrm6_output_finish(struct sk_buff *skb) return dst_output(skb); err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, &skb, NULL, - skb->dst->dev, xfrm6_output_finish); + skb->dst->dev, xfrm6_output_finish2); if (unlikely(err != 1)) break; } @@ -175,6 +118,41 @@ static int xfrm6_output_finish(struct sk_buff *skb) return err; } +static int xfrm6_output_finish(struct sk_buff *skb) +{ + struct sk_buff *segs; + + if (!skb_is_gso(skb)) + return xfrm6_output_finish2(skb); + + skb->protocol = htons(ETH_P_IP); + segs = skb_gso_segment(skb, 0); + kfree_skb(skb); + if (unlikely(IS_ERR(segs))) + return PTR_ERR(segs); + + do { + struct sk_buff *nskb = segs->next; + int err; + + segs->next = NULL; + err = xfrm6_output_finish2(segs); + + if (unlikely(err)) { + while ((segs = nskb)) { + nskb = segs->next; + segs->next = NULL; + kfree_skb(segs); + } + return err; + } + + segs = nskb; + } while (segs); + + return 0; +} + int xfrm6_output(struct sk_buff *skb) { return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dst->dev, diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 88c840f1beb6..73cd250aecbb 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -12,7 +12,6 @@ */ #include <linux/compiler.h> -#include <linux/config.h> #include <linux/netdevice.h> #include <net/addrconf.h> #include <net/xfrm.h> @@ -23,8 +22,6 @@ static struct dst_ops xfrm6_dst_ops; static struct xfrm_policy_afinfo xfrm6_policy_afinfo; -static struct xfrm_type_map xfrm6_type_map = { .lock = RW_LOCK_UNLOCKED }; - static int xfrm6_dst_lookup(struct xfrm_dst **dst, struct flowi *fl) { int err = 0; @@ -249,9 +246,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl) static inline int xfrm6_garbage_collect(void) { - read_lock(&xfrm6_policy_afinfo.lock); xfrm6_policy_afinfo.garbage_collect(); - read_unlock(&xfrm6_policy_afinfo.lock); return (atomic_read(&xfrm6_dst_ops.entries) > xfrm6_dst_ops.gc_thresh*2); } @@ -311,8 +306,6 @@ static struct dst_ops xfrm6_dst_ops = { static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .family = AF_INET6, - .lock = RW_LOCK_UNLOCKED, - .type_map = &xfrm6_type_map, .dst_ops = &xfrm6_dst_ops, .dst_lookup = xfrm6_dst_lookup, .find_bundle = __xfrm6_find_bundle, diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index a5723024d3b3..b33296b3f6de 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -135,7 +135,6 @@ __xfrm6_find_acq(u8 mode, u32 reqid, u8 proto, static struct xfrm_state_afinfo xfrm6_state_afinfo = { .family = AF_INET6, - .lock = RW_LOCK_UNLOCKED, .init_tempsel = __xfrm6_init_tempsel, .state_lookup = __xfrm6_state_lookup, .find_acq = __xfrm6_find_acq, diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index d37768e5064f..6b44fe8516c3 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -21,7 +21,6 @@ * Based on net/ipv4/xfrm4_tunnel.c * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/xfrm.h> #include <linux/list.h> diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 811d998725bc..aa34ff4b707c 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -28,7 +28,6 @@ * See net/ipx/ChangeLog. */ -#include <linux/config.h> #include <linux/capability.h> #include <linux/errno.h> #include <linux/if_arp.h> diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c index 1f73d9ea434d..4c0c71206e54 100644 --- a/net/ipx/ipx_proc.c +++ b/net/ipx/ipx_proc.c @@ -4,7 +4,6 @@ * Copyright(C) Arnaldo Carvalho de Melo <acme@conectiva.com.br>, 2002 */ -#include <linux/config.h> #include <linux/init.h> #ifdef CONFIG_PROC_FS #include <linux/proc_fs.h> diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c index a394c6fe19a2..a30dbb1e08fb 100644 --- a/net/ipx/ipx_route.c +++ b/net/ipx/ipx_route.c @@ -7,7 +7,6 @@ * See net/ipx/ChangeLog. */ -#include <linux/config.h> #include <linux/list.h> #include <linux/route.h> #include <linux/spinlock.h> @@ -238,7 +237,7 @@ int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx, } /* Apply checksum. Not allowed on 802.3 links. */ - if (sk->sk_no_check || intrfc->if_dlink_type == IPX_FRAME_8023) + if (sk->sk_no_check || intrfc->if_dlink_type == htons(IPX_FRAME_8023)) ipx->ipx_checksum = 0xFFFF; else ipx->ipx_checksum = ipx_cksum(ipx, len + sizeof(struct ipxhdr)); diff --git a/net/ipx/sysctl_net_ipx.c b/net/ipx/sysctl_net_ipx.c index 510eda96d10a..fa574735c76f 100644 --- a/net/ipx/sysctl_net_ipx.c +++ b/net/ipx/sysctl_net_ipx.c @@ -6,7 +6,6 @@ * Added /proc/sys/net/ipx/ipx_pprop_broadcasting - acme March 4, 2001 */ -#include <linux/config.h> #include <linux/mm.h> #include <linux/sysctl.h> diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 2f37c9f35e27..7fae48a53bff 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -42,7 +42,6 @@ * ********************************************************************/ -#include <linux/config.h> #include <linux/capability.h> #include <linux/module.h> #include <linux/types.h> diff --git a/net/irda/ircomm/ircomm_core.c b/net/irda/ircomm/ircomm_core.c index 286881978858..9c4a902a9dba 100644 --- a/net/irda/ircomm/ircomm_core.c +++ b/net/irda/ircomm/ircomm_core.c @@ -29,7 +29,6 @@ * ********************************************************************/ -#include <linux/config.h> #include <linux/module.h> #include <linux/sched.h> #include <linux/proc_fs.h> diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 6f20b4206e08..b400f27851fc 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -30,7 +30,6 @@ * ********************************************************************/ -#include <linux/config.h> #include <linux/init.h> #include <linux/module.h> #include <linux/fs.h> @@ -124,7 +123,6 @@ static int __init ircomm_tty_init(void) driver->owner = THIS_MODULE; driver->driver_name = "ircomm"; driver->name = "ircomm"; - driver->devfs_name = "ircomm"; driver->major = IRCOMM_TTY_MAJOR; driver->minor_start = IRCOMM_TTY_MINOR; driver->type = TTY_DRIVER_TYPE_SERIAL; diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c index e3debbdb67f5..ba40e5495f58 100644 --- a/net/irda/irda_device.c +++ b/net/irda/irda_device.c @@ -29,7 +29,6 @@ * ********************************************************************/ -#include <linux/config.h> #include <linux/string.h> #include <linux/proc_fs.h> #include <linux/skbuff.h> diff --git a/net/irda/iriap.c b/net/irda/iriap.c index 2d2e2b1919f4..a0472652a44e 100644 --- a/net/irda/iriap.c +++ b/net/irda/iriap.c @@ -24,7 +24,6 @@ * ********************************************************************/ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/skbuff.h> diff --git a/net/irda/irlan/irlan_client.c b/net/irda/irlan/irlan_client.c index f8e6cb0db04b..95cf1234ea17 100644 --- a/net/irda/irlan/irlan_client.c +++ b/net/irda/irlan/irlan_client.c @@ -173,13 +173,14 @@ void irlan_client_discovery_indication(discinfo_t *discovery, rcu_read_lock(); self = irlan_get_any(); if (self) { - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); + IRDA_ASSERT(self->magic == IRLAN_MAGIC, goto out;); IRDA_DEBUG(1, "%s(), Found instance (%08x)!\n", __FUNCTION__ , daddr); irlan_client_wakeup(self, saddr, daddr); } +IRDA_ASSERT_LABEL(out:) rcu_read_unlock(); } diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c index 657d12210578..bd659dd545ac 100644 --- a/net/irda/irlan/irlan_common.c +++ b/net/irda/irlan/irlan_common.c @@ -23,7 +23,6 @@ * ********************************************************************/ -#include <linux/config.h> #include <linux/module.h> #include <linux/kernel.h> diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index 953e255d2bc8..b0ccc455b747 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c @@ -25,7 +25,6 @@ * ********************************************************************/ -#include <linux/config.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/inetdevice.h> diff --git a/net/irda/irlap.c b/net/irda/irlap.c index 7029618f5719..cade355ac8af 100644 --- a/net/irda/irlap.c +++ b/net/irda/irlap.c @@ -29,7 +29,6 @@ * ********************************************************************/ -#include <linux/config.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/skbuff.h> @@ -884,7 +883,8 @@ static void irlap_change_speed(struct irlap_cb *self, __u32 speed, int now) if (now) { /* Send down empty frame to trigger speed change */ skb = dev_alloc_skb(0); - irlap_queue_xmit(self, skb); + if (skb) + irlap_queue_xmit(self, skb); } } diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c index a505b5457608..99faff68c399 100644 --- a/net/irda/irlap_event.c +++ b/net/irda/irlap_event.c @@ -25,7 +25,6 @@ * ********************************************************************/ -#include <linux/config.h> #include <linux/string.h> #include <linux/kernel.h> #include <linux/delay.h> diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c index c19e9ce05a3a..129ad64c15bb 100644 --- a/net/irda/irlmp.c +++ b/net/irda/irlmp.c @@ -24,7 +24,6 @@ * ********************************************************************/ -#include <linux/config.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/string.h> @@ -44,6 +43,8 @@ #include <net/irda/irlmp.h> #include <net/irda/irlmp_frame.h> +#include <asm/unaligned.h> + static __u8 irlmp_find_free_slsap(void); static int irlmp_slsap_inuse(__u8 slsap_sel); @@ -840,6 +841,7 @@ void irlmp_do_expiry(void) void irlmp_do_discovery(int nslots) { struct lap_cb *lap; + __u16 *data_hintsp; /* Make sure the value is sane */ if ((nslots != 1) && (nslots != 6) && (nslots != 8) && (nslots != 16)){ @@ -849,7 +851,8 @@ void irlmp_do_discovery(int nslots) } /* Construct new discovery info to be used by IrLAP, */ - u16ho(irlmp->discovery_cmd.data.hints) = irlmp->hints.word; + data_hintsp = (__u16 *) irlmp->discovery_cmd.data.hints; + put_unaligned(irlmp->hints.word, data_hintsp); /* * Set character set for device name (we use ASCII), and diff --git a/net/irda/irlmp_event.c b/net/irda/irlmp_event.c index 26649f6528e6..4c90dd1b4503 100644 --- a/net/irda/irlmp_event.c +++ b/net/irda/irlmp_event.c @@ -24,7 +24,6 @@ * ********************************************************************/ -#include <linux/config.h> #include <linux/kernel.h> #include <net/irda/irda.h> diff --git a/net/irda/irlmp_frame.c b/net/irda/irlmp_frame.c index 91cd268172fa..39761a1d18f5 100644 --- a/net/irda/irlmp_frame.c +++ b/net/irda/irlmp_frame.c @@ -24,7 +24,6 @@ * ********************************************************************/ -#include <linux/config.h> #include <linux/skbuff.h> #include <linux/kernel.h> diff --git a/net/irda/irmod.c b/net/irda/irmod.c index 634901dd156f..2869b16e417d 100644 --- a/net/irda/irmod.c +++ b/net/irda/irmod.c @@ -31,7 +31,6 @@ * Jean II */ -#include <linux/config.h> #include <linux/module.h> #include <linux/moduleparam.h> diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h index e4fe1e80029c..80887528e77e 100644 --- a/net/irda/irnet/irnet.h +++ b/net/irda/irnet/irnet.h @@ -244,12 +244,10 @@ #include <linux/skbuff.h> #include <linux/tty.h> #include <linux/proc_fs.h> -#include <linux/devfs_fs_kernel.h> #include <linux/netdevice.h> #include <linux/miscdevice.h> #include <linux/poll.h> #include <linux/capability.h> -#include <linux/config.h> #include <linux/ctype.h> /* isspace() */ #include <asm/uaccess.h> #include <linux/init.h> diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c index 1b1c4193359a..86805c3d8324 100644 --- a/net/irda/irsysctl.c +++ b/net/irda/irsysctl.c @@ -23,7 +23,6 @@ * ********************************************************************/ -#include <linux/config.h> #include <linux/mm.h> #include <linux/ctype.h> #include <linux/sysctl.h> diff --git a/net/irda/irttp.c b/net/irda/irttp.c index 8aff254cb418..49c51c5f1a86 100644 --- a/net/irda/irttp.c +++ b/net/irda/irttp.c @@ -24,7 +24,6 @@ * ********************************************************************/ -#include <linux/config.h> #include <linux/skbuff.h> #include <linux/init.h> #include <linux/seq_file.h> diff --git a/net/irda/qos.c b/net/irda/qos.c index ddfb5c502a90..95a69c013ee8 100644 --- a/net/irda/qos.c +++ b/net/irda/qos.c @@ -30,7 +30,6 @@ * ********************************************************************/ -#include <linux/config.h> #include <asm/byteorder.h> #include <net/irda/irda.h> diff --git a/net/irda/timer.c b/net/irda/timer.c index 0e17f976add6..3871a2b911f9 100644 --- a/net/irda/timer.c +++ b/net/irda/timer.c @@ -25,7 +25,6 @@ ********************************************************************/ #include <asm/system.h> -#include <linux/config.h> #include <linux/delay.h> #include <net/irda/timer.h> diff --git a/net/key/af_key.c b/net/key/af_key.c index 859582275cab..3a95b2ee4690 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -14,7 +14,6 @@ * Derek Atkins <derek@ihtfp.com> */ -#include <linux/config.h> #include <linux/capability.h> #include <linux/module.h> #include <linux/kernel.h> @@ -1454,21 +1453,23 @@ static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h if (x == NULL) return -ESRCH; + if ((err = security_xfrm_state_delete(x))) + goto out; + if (xfrm_state_kern(x)) { - xfrm_state_put(x); - return -EPERM; + err = -EPERM; + goto out; } err = xfrm_state_delete(x); - if (err < 0) { - xfrm_state_put(x); - return err; - } + if (err < 0) + goto out; c.seq = hdr->sadb_msg_seq; c.pid = hdr->sadb_msg_pid; c.event = XFRM_MSG_DELSA; km_state_notify(x, &c); +out: xfrm_state_put(x); return err; @@ -2274,11 +2275,14 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg err = 0; + if ((err = security_xfrm_policy_delete(xp))) + goto out; c.seq = hdr->sadb_msg_seq; c.pid = hdr->sadb_msg_pid; c.event = XFRM_MSG_DELPOLICY; km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c); +out: xfrm_pol_put(xp); return err; } diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 5a04db745c8d..d6cfe84d521b 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -20,7 +20,6 @@ * * See the GNU General Public License for more details. */ -#include <linux/config.h> #include <linux/compiler.h> #include <linux/kernel.h> #include <linux/module.h> @@ -674,7 +673,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, lock_sock(sk); copied = -ENOTCONN; - if (sk->sk_state == TCP_LISTEN) + if (unlikely(sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN)) goto out; timeo = sock_rcvtimeo(sk, nonblock); @@ -733,7 +732,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, if (sk->sk_shutdown & RCV_SHUTDOWN) break; - if (sk->sk_state == TCP_CLOSE) { + if (sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_CLOSE) { if (!sock_flag(sk, SOCK_DONE)) { /* * This occurs when user tries to read @@ -789,7 +788,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, continue; if (!(flags & MSG_PEEK)) { - sk_eat_skb(sk, skb); + sk_eat_skb(sk, skb, 0); *seq = 0; } } while (len > 0); diff --git a/net/llc/llc_if.c b/net/llc/llc_if.c index ba90f7f0801a..a89917130a7b 100644 --- a/net/llc/llc_if.c +++ b/net/llc/llc_if.c @@ -11,7 +11,6 @@ * * See the GNU General Public License for more details. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/netdevice.h> @@ -26,8 +25,6 @@ #include <net/llc_c_st.h> #include <net/tcp_states.h> -u8 llc_mac_null_var[IFHWADDRLEN]; - /** * llc_build_and_send_pkt - Connection data sending for upper layers. * @sk: connection diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index d62e0f9b9da3..94d2368ade92 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -142,6 +142,8 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, struct llc_sap *sap; struct llc_pdu_sn *pdu; int dest; + int (*rcv)(struct sk_buff *, struct net_device *, + struct packet_type *, struct net_device *); /* * When the interface is in promisc. mode, drop all the crap that it @@ -169,9 +171,11 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, * First the upper layer protocols that don't need the full * LLC functionality */ - if (sap->rcv_func) { - sap->rcv_func(skb, dev, pt, orig_dev); - goto out_put; + rcv = rcu_dereference(sap->rcv_func); + if (rcv) { + struct sk_buff *cskb = skb_clone(skb, GFP_ATOMIC); + if (cskb) + rcv(cskb, dev, pt, orig_dev); } dest = llc_pdu_type(skb); if (unlikely(!dest || !llc_type_handlers[dest - 1])) diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c index bd531cb235a7..19308fece3ad 100644 --- a/net/llc/llc_proc.c +++ b/net/llc/llc_proc.c @@ -12,7 +12,6 @@ * See the GNU General Public License for more details. */ -#include <linux/config.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/proc_fs.h> diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index 4029ceee9b91..20c4eb5c1ac6 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -282,7 +282,7 @@ static void llc_sap_rcv(struct llc_sap *sap, struct sk_buff *skb) * mac, and local sap. Returns pointer for socket found, %NULL otherwise. */ static struct sock *llc_lookup_dgram(struct llc_sap *sap, - struct llc_addr *laddr) + const struct llc_addr *laddr) { struct sock *rc; struct hlist_node *node; @@ -304,19 +304,62 @@ found: return rc; } +/** + * llc_sap_mcast - Deliver multicast PDU's to all matching datagram sockets. + * @sap: SAP + * @laddr: address of local LLC (MAC + SAP) + * + * Search socket list of the SAP and finds connections with same sap. + * Deliver clone to each. + */ +static void llc_sap_mcast(struct llc_sap *sap, + const struct llc_addr *laddr, + struct sk_buff *skb) +{ + struct sock *sk; + struct hlist_node *node; + + read_lock_bh(&sap->sk_list.lock); + sk_for_each(sk, node, &sap->sk_list.list) { + struct llc_sock *llc = llc_sk(sk); + struct sk_buff *skb1; + + if (sk->sk_type != SOCK_DGRAM) + continue; + + if (llc->laddr.lsap != laddr->lsap) + continue; + + skb1 = skb_clone(skb, GFP_ATOMIC); + if (!skb1) + break; + + sock_hold(sk); + skb_set_owner_r(skb1, sk); + llc_sap_rcv(sap, skb1); + sock_put(sk); + } + read_unlock_bh(&sap->sk_list.lock); +} + + void llc_sap_handler(struct llc_sap *sap, struct sk_buff *skb) { struct llc_addr laddr; - struct sock *sk; llc_pdu_decode_da(skb, laddr.mac); llc_pdu_decode_dsap(skb, &laddr.lsap); - sk = llc_lookup_dgram(sap, &laddr); - if (sk) { - skb_set_owner_r(skb, sk); - llc_sap_rcv(sap, skb); - sock_put(sk); - } else + if (llc_mac_multicast(laddr.mac)) { + llc_sap_mcast(sap, &laddr, skb); kfree_skb(skb); + } else { + struct sock *sk = llc_lookup_dgram(sap, &laddr); + if (sk) { + skb_set_owner_r(skb, sk); + llc_sap_rcv(sap, skb); + sock_put(sk); + } else + kfree_skb(skb); + } } diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index f37dbf8ef126..8275bd33bd9d 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -11,7 +11,6 @@ * * See the GNU General Public License for more details. */ -#include <linux/config.h> #include <linux/init.h> #include <linux/module.h> #include <net/llc.h> diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c index d1eaddb13633..45d7dd92a088 100644 --- a/net/llc/sysctl_net_llc.c +++ b/net/llc/sysctl_net_llc.c @@ -4,7 +4,6 @@ * Arnaldo Carvalho de Melo <acme@conectiva.com.br> */ -#include <linux/config.h> #include <linux/mm.h> #include <linux/init.h> #include <linux/sysctl.h> diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index e2893effdfaa..42a178aa30f9 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -60,6 +60,18 @@ config NF_CONNTRACK_MARK of packets, but this mark value is kept in the conntrack session instead of the individual packets. +config NF_CONNTRACK_SECMARK + bool 'Connection tracking security mark support' + depends on NF_CONNTRACK && NETWORK_SECMARK + help + This option enables security markings to be applied to + connections. Typically they are copied to connections from + packets using the CONNSECMARK target and copied back from + connections to packets with the same target, with the packets + being originally labeled via SECMARK. + + If unsure, say 'N'. + config NF_CONNTRACK_EVENTS bool "Connection tracking events (EXPERIMENTAL)" depends on EXPERIMENTAL && NF_CONNTRACK @@ -174,6 +186,26 @@ config NETFILTER_XT_TARGET_NOTRACK If you want to compile it as a module, say M here and read <file:Documentation/modules.txt>. If unsure, say `N'. +config NETFILTER_XT_TARGET_SECMARK + tristate '"SECMARK" target support' + depends on NETFILTER_XTABLES && NETWORK_SECMARK + help + The SECMARK target allows security marking of network + packets, for use with security subsystems. + + To compile it as a module, choose M here. If unsure, say N. + +config NETFILTER_XT_TARGET_CONNSECMARK + tristate '"CONNSECMARK" target support' + depends on NETFILTER_XTABLES && (NF_CONNTRACK_SECMARK || IP_NF_CONNTRACK_SECMARK) + help + The CONNSECMARK target copies security markings from packets + to connections, and restores security markings from connections + to packets (if the packets are not already marked). This would + normally be used in conjunction with the SECMARK target. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_COMMENT tristate '"comment" match support' depends on NETFILTER_XTABLES @@ -329,6 +361,16 @@ config NETFILTER_XT_MATCH_PKTTYPE To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_MATCH_QUOTA + tristate '"quota" match support' + depends on NETFILTER_XTABLES + help + This option adds a `quota' match, which allows to match on a + byte counter. + + If you want to compile it as a module, say M here and read + <file:Documentation/modules.txt>. If unsure, say `N'. + config NETFILTER_XT_MATCH_REALM tristate '"realm" match support' depends on NETFILTER_XTABLES @@ -365,6 +407,15 @@ config NETFILTER_XT_MATCH_STATE To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_MATCH_STATISTIC + tristate '"statistic" match support' + depends on NETFILTER_XTABLES + help + This option adds a `statistic' match, which allows you to match + on packets periodically or randomly with a given percentage. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_STRING tristate '"string" match support' depends on NETFILTER_XTABLES diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 95b7e416512d..6fa4b7580458 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -28,6 +28,8 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o +obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o +obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o # matches obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o @@ -44,9 +46,11 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_MARK) += xt_mark.o obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o +obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA) += xt_quota.o obj-$(CONFIG_NETFILTER_XT_MATCH_REALM) += xt_realm.o obj-$(CONFIG_NETFILTER_XT_MATCH_SCTP) += xt_sctp.o obj-$(CONFIG_NETFILTER_XT_MATCH_STATE) += xt_state.o +obj-$(CONFIG_NETFILTER_XT_MATCH_STATISTIC) += xt_statistic.o obj-$(CONFIG_NETFILTER_XT_MATCH_STRING) += xt_string.o obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 8455a32ea5c4..5d29d5e23624 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -10,7 +10,6 @@ * 15-Mar-2000: Added NF_REPEAT --RR. * 08-May-2003: Internal logging interface added by Jozsef Kadlecsik. */ -#include <linux/config.h> #include <linux/kernel.h> #include <linux/netfilter.h> #include <net/protocol.h> diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f9b83f91371a..8f2261965a68 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -29,7 +29,6 @@ * Derived from net/ipv4/netfilter/ip_conntrack_core.c */ -#include <linux/config.h> #include <linux/types.h> #include <linux/netfilter.h> #include <linux/module.h> @@ -990,6 +989,9 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, #ifdef CONFIG_NF_CONNTRACK_MARK conntrack->mark = exp->master->mark; #endif +#ifdef CONFIG_NF_CONNTRACK_SECMARK + conntrack->secmark = exp->master->secmark; +#endif nf_conntrack_get(&conntrack->master->ct_general); NF_CT_STAT_INC(expect_new); } else @@ -1396,6 +1398,12 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, write_lock_bh(&nf_conntrack_lock); + /* Only update if this is not a fixed timeout */ + if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) { + write_unlock_bh(&nf_conntrack_lock); + return; + } + /* If not in hash table, timer will not be active yet */ if (!nf_ct_is_confirmed(ct)) { ct->timeout.expires = extra_jiffies; diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index e38a4b5a3089..960972d225f9 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -15,7 +15,6 @@ * Derived from net/ipv4/netfilter/ip_conntrack_ftp.c */ -#include <linux/config.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/netfilter.h> @@ -67,37 +66,48 @@ static int try_epsv_response(const char *, size_t, struct nf_conntrack_man *, char); static struct ftp_search { - enum ip_conntrack_dir dir; const char *pattern; size_t plen; char skip; char term; enum ip_ct_ftp_type ftptype; int (*getnum)(const char *, size_t, struct nf_conntrack_man *, char); -} search[] = { - { - IP_CT_DIR_ORIGINAL, - "PORT", sizeof("PORT") - 1, ' ', '\r', - IP_CT_FTP_PORT, - try_rfc959, +} search[IP_CT_DIR_MAX][2] = { + [IP_CT_DIR_ORIGINAL] = { + { + .pattern = "PORT", + .plen = sizeof("PORT") - 1, + .skip = ' ', + .term = '\r', + .ftptype = IP_CT_FTP_PORT, + .getnum = try_rfc959, + }, + { + .pattern = "EPRT", + .plen = sizeof("EPRT") - 1, + .skip = ' ', + .term = '\r', + .ftptype = IP_CT_FTP_EPRT, + .getnum = try_eprt, + }, }, - { - IP_CT_DIR_REPLY, - "227 ", sizeof("227 ") - 1, '(', ')', - IP_CT_FTP_PASV, - try_rfc959, - }, - { - IP_CT_DIR_ORIGINAL, - "EPRT", sizeof("EPRT") - 1, ' ', '\r', - IP_CT_FTP_EPRT, - try_eprt, - }, - { - IP_CT_DIR_REPLY, - "229 ", sizeof("229 ") - 1, '(', ')', - IP_CT_FTP_EPSV, - try_epsv_response, + [IP_CT_DIR_REPLY] = { + { + .pattern = "227 ", + .plen = sizeof("227 ") - 1, + .skip = '(', + .term = ')', + .ftptype = IP_CT_FTP_PASV, + .getnum = try_rfc959, + }, + { + .pattern = "229 ", + .plen = sizeof("229 ") - 1, + .skip = '(', + .term = ')', + .ftptype = IP_CT_FTP_EPSV, + .getnum = try_epsv_response, + }, }, }; @@ -492,17 +502,15 @@ static int help(struct sk_buff **pskb, memcpy(cmd.u3.all, &ct->tuplehash[dir].tuple.src.u3.all, sizeof(cmd.u3.all)); - for (i = 0; i < ARRAY_SIZE(search); i++) { - if (search[i].dir != dir) continue; - + for (i = 0; i < ARRAY_SIZE(search[dir]); i++) { found = find_pattern(fb_ptr, datalen, - search[i].pattern, - search[i].plen, - search[i].skip, - search[i].term, + search[dir][i].pattern, + search[dir][i].plen, + search[dir][i].skip, + search[dir][i].term, &matchoff, &matchlen, &cmd, - search[i].getnum); + search[dir][i].getnum); if (found) break; } if (found == -1) { @@ -512,7 +520,7 @@ static int help(struct sk_buff **pskb, this case. */ if (net_ratelimit()) printk("conntrack_ftp: partial %s %u+%u\n", - search[i].pattern, + search[dir][i].pattern, ntohl(th->seq), datalen); ret = NF_DROP; goto out; @@ -597,7 +605,7 @@ static int help(struct sk_buff **pskb, /* Now, NAT might want to mangle the packet, and register the * (possibly changed) expectation itself. */ if (nf_nat_ftp_hook) - ret = nf_nat_ftp_hook(pskb, ctinfo, search[i].ftptype, + ret = nf_nat_ftp_hook(pskb, ctinfo, search[dir][i].ftptype, matchoff, matchlen, exp, &seq); else { /* Can't expect this? Best to drop packet now. */ diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c index 3fc58e454d4e..21e0bc91cf23 100644 --- a/net/netfilter/nf_conntrack_l3proto_generic.c +++ b/net/netfilter/nf_conntrack_l3proto_generic.c @@ -15,7 +15,6 @@ * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> */ -#include <linux/config.h> #include <linux/types.h> #include <linux/ip.h> #include <linux/netfilter.h> diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index bd10eb944b65..af4845971f70 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -29,6 +29,7 @@ #include <linux/errno.h> #include <linux/netlink.h> #include <linux/spinlock.h> +#include <linux/interrupt.h> #include <linux/notifier.h> #include <linux/netfilter.h> @@ -407,6 +408,8 @@ nfattr_failure: static int ctnetlink_done(struct netlink_callback *cb) { + if (cb->args[1]) + nf_ct_put((struct nf_conn *)cb->args[1]); DEBUGP("entered %s\n", __FUNCTION__); return 0; } @@ -416,10 +419,9 @@ static int ctnetlink_done(struct netlink_callback *cb) static int ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { - struct nf_conn *ct = NULL; + struct nf_conn *ct, *last; struct nf_conntrack_tuple_hash *h; struct list_head *i; - u_int32_t *id = (u_int32_t *) &cb->args[1]; struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh); u_int8_t l3proto = nfmsg->nfgen_family; @@ -427,7 +429,9 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) cb->args[0], *id); read_lock_bh(&nf_conntrack_lock); - for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++, *id = 0) { + for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) { +restart: + last = (struct nf_conn *)cb->args[1]; list_for_each_prev(i, &nf_conntrack_hash[cb->args[0]]) { h = (struct nf_conntrack_tuple_hash *) i; if (DIRECTION(h) != IP_CT_DIR_ORIGINAL) @@ -438,17 +442,30 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) * then dump everything. */ if (l3proto && L3PROTO(ct) != l3proto) continue; - if (ct->id <= *id) - continue; + if (last != NULL) { + if (ct == last) { + nf_ct_put(last); + cb->args[1] = 0; + last = NULL; + } else + continue; + } if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, IPCTNL_MSG_CT_NEW, - 1, ct) < 0) + 1, ct) < 0) { + nf_conntrack_get(&ct->ct_general); + cb->args[1] = (unsigned long)ct; goto out; - *id = ct->id; + } + } + if (last != NULL) { + nf_ct_put(last); + cb->args[1] = 0; + goto restart; } } -out: +out: read_unlock_bh(&nf_conntrack_lock); DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id); @@ -641,7 +658,7 @@ static const size_t cta_min_nat[CTA_NAT_MAX] = { }; static inline int -ctnetlink_parse_nat(struct nfattr *cda[], +ctnetlink_parse_nat(struct nfattr *nat, const struct nf_conn *ct, struct ip_nat_range *range) { struct nfattr *tb[CTA_NAT_MAX]; @@ -651,7 +668,7 @@ ctnetlink_parse_nat(struct nfattr *cda[], memset(range, 0, sizeof(*range)); - nfattr_parse_nested(tb, CTA_NAT_MAX, cda[CTA_NAT-1]); + nfattr_parse_nested(tb, CTA_NAT_MAX, nat); if (nfattr_bad_size(tb, CTA_NAT_MAX, cta_min_nat)) return -EINVAL; @@ -866,39 +883,30 @@ ctnetlink_change_status(struct nf_conn *ct, struct nfattr *cda[]) /* ASSURED bit can only be set */ return -EINVAL; - if (cda[CTA_NAT-1]) { + if (cda[CTA_NAT_SRC-1] || cda[CTA_NAT_DST-1]) { #ifndef CONFIG_IP_NF_NAT_NEEDED return -EINVAL; #else - unsigned int hooknum; struct ip_nat_range range; - if (ctnetlink_parse_nat(cda, ct, &range) < 0) - return -EINVAL; - - DEBUGP("NAT: %u.%u.%u.%u-%u.%u.%u.%u:%u-%u\n", - NIPQUAD(range.min_ip), NIPQUAD(range.max_ip), - htons(range.min.all), htons(range.max.all)); - - /* This is tricky but it works. ip_nat_setup_info needs the - * hook number as parameter, so let's do the correct - * conversion and run away */ - if (status & IPS_SRC_NAT_DONE) - hooknum = NF_IP_POST_ROUTING; /* IP_NAT_MANIP_SRC */ - else if (status & IPS_DST_NAT_DONE) - hooknum = NF_IP_PRE_ROUTING; /* IP_NAT_MANIP_DST */ - else - return -EINVAL; /* Missing NAT flags */ - - DEBUGP("NAT status: %lu\n", - status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK)); - - if (ip_nat_initialized(ct, HOOK2MANIP(hooknum))) - return -EEXIST; - ip_nat_setup_info(ct, &range, hooknum); - - DEBUGP("NAT status after setup_info: %lu\n", - ct->status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK)); + if (cda[CTA_NAT_DST-1]) { + if (ctnetlink_parse_nat(cda[CTA_NAT_DST-1], ct, + &range) < 0) + return -EINVAL; + if (ip_nat_initialized(ct, + HOOK2MANIP(NF_IP_PRE_ROUTING))) + return -EEXIST; + ip_nat_setup_info(ct, &range, hooknum); + } + if (cda[CTA_NAT_SRC-1]) { + if (ctnetlink_parse_nat(cda[CTA_NAT_SRC-1], ct, + &range) < 0) + return -EINVAL; + if (ip_nat_initialized(ct, + HOOK2MANIP(NF_IP_POST_ROUTING))) + return -EEXIST; + ip_nat_setup_info(ct, &range, hooknum); + } #endif } @@ -1122,7 +1130,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, /* implicit 'else' */ /* we only allow nat config for new conntracks */ - if (cda[CTA_NAT-1]) { + if (cda[CTA_NAT_SRC-1] || cda[CTA_NAT_DST-1]) { err = -EINVAL; goto out_unlock; } diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 0c6da496cfa9..9bd8a7877fd5 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -28,6 +28,8 @@ #include <linux/sctp.h> #include <linux/string.h> #include <linux/seq_file.h> +#include <linux/spinlock.h> +#include <linux/interrupt.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_protocol.h> @@ -259,7 +261,7 @@ static int do_basic_checks(struct nf_conn *conntrack, } DEBUGP("Basic checks passed\n"); - return 0; + return count == 0; } static int new_state(enum ip_conntrack_dir dir, diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 69899f27d26a..af8adcba23a7 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -24,7 +24,6 @@ * version 2.2 */ -#include <linux/config.h> #include <linux/types.h> #include <linux/sched.h> #include <linux/timer.h> @@ -828,8 +827,9 @@ static int tcp_error(struct sk_buff *skb, * and moreover root might send raw packets. */ /* FIXME: Source route IP option packets --RR */ - if (((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) || - (pf == PF_INET6 && hooknum == NF_IP6_PRE_ROUTING)) && + if (nf_conntrack_checksum && + ((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) || + (pf == PF_INET6 && hooknum == NF_IP6_PRE_ROUTING)) && nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) { if (LOG_INVALID(IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index d93edbfde9e3..ae07ebe3ab37 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -134,7 +134,8 @@ static int udp_error(struct sk_buff *skb, unsigned int dataoff, * because the semantic of CHECKSUM_HW is different there * and moreover root might send raw packets. * FIXME: Source route IP option packets --RR */ - if (((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) || + if (nf_conntrack_checksum && + ((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) || (pf == PF_INET6 && hooknum == NF_IP6_PRE_ROUTING)) && nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) { if (LOG_INVALID(IPPROTO_UDP)) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 408960c6a544..5fcab2ef231f 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -17,7 +17,6 @@ * Derived from net/ipv4/netfilter/ip_conntrack_standalone.c */ -#include <linux/config.h> #include <linux/types.h> #include <linux/netfilter.h> #include <linux/module.h> @@ -213,6 +212,11 @@ static int ct_seq_show(struct seq_file *s, void *v) return -ENOSPC; #endif +#ifdef CONFIG_NF_CONNTRACK_SECMARK + if (seq_printf(s, "secmark=%u ", conntrack->secmark)) + return -ENOSPC; +#endif + if (seq_printf(s, "use=%u\n", atomic_read(&conntrack->ct_general.use))) return -ENOSPC; @@ -455,6 +459,8 @@ extern unsigned int nf_ct_generic_timeout; static int log_invalid_proto_min = 0; static int log_invalid_proto_max = 255; +int nf_conntrack_checksum = 1; + static struct ctl_table_header *nf_ct_sysctl_header; static ctl_table nf_ct_sysctl_table[] = { @@ -483,6 +489,14 @@ static ctl_table nf_ct_sysctl_table[] = { .proc_handler = &proc_dointvec, }, { + .ctl_name = NET_NF_CONNTRACK_CHECKSUM, + .procname = "nf_conntrack_checksum", + .data = &nf_conntrack_checksum, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT, .procname = "nf_conntrack_tcp_timeout_syn_sent", .data = &nf_ct_tcp_timeout_syn_sent, @@ -851,6 +865,7 @@ EXPORT_SYMBOL(nf_ct_proto_put); EXPORT_SYMBOL(nf_ct_l3proto_find_get); EXPORT_SYMBOL(nf_ct_l3proto_put); EXPORT_SYMBOL(nf_ct_l3protos); +EXPORT_SYMBOL_GPL(nf_conntrack_checksum); EXPORT_SYMBOL(nf_conntrack_expect_alloc); EXPORT_SYMBOL(nf_conntrack_expect_put); EXPORT_SYMBOL(nf_conntrack_expect_related); diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index 6bdee2910617..86e392bfe833 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -1,7 +1,6 @@ #ifndef _NF_INTERNALS_H #define _NF_INTERNALS_H -#include <linux/config.h> #include <linux/list.h> #include <linux/skbuff.h> #include <linux/netdevice.h> diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 3e76bd0824a2..8901b3a07f7e 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -1,4 +1,3 @@ -#include <linux/config.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index ee8f70889f47..bb6fcee452ca 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -1,4 +1,3 @@ -#include <linux/config.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c index 0a63d7dac7be..c2e44e90e437 100644 --- a/net/netfilter/nf_sockopt.c +++ b/net/netfilter/nf_sockopt.c @@ -1,4 +1,3 @@ -#include <linux/config.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index b88e82a1a987..52fdfa2686c9 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -14,7 +14,6 @@ * of the GNU General Public License, incorporated herein by reference. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/socket.h> @@ -229,7 +228,7 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, NFNL_SUBSYS_ID(nlh->nlmsg_type), NFNL_MSG_TYPE(nlh->nlmsg_type)); - if (!cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN)) { + if (security_netlink_recv(skb, CAP_NET_ADMIN)) { DEBUGP("missing CAP_NET_ADMIN\n"); *errp = -EPERM; return -1; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 86a4ac33de34..49ef41e34c48 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -680,11 +680,19 @@ dev_cmp(struct nfqnl_queue_entry *entry, unsigned long ifindex) if (entinf->indev) if (entinf->indev->ifindex == ifindex) return 1; - if (entinf->outdev) if (entinf->outdev->ifindex == ifindex) return 1; - +#ifdef CONFIG_BRIDGE_NETFILTER + if (entry->skb->nf_bridge) { + if (entry->skb->nf_bridge->physindev && + entry->skb->nf_bridge->physindev->ifindex == ifindex) + return 1; + if (entry->skb->nf_bridge->physoutdev && + entry->skb->nf_bridge->physoutdev->ifindex == ifindex) + return 1; + } +#endif return 0; } diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 99293c63ff73..174e8f970095 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -13,7 +13,6 @@ * */ -#include <linux/config.h> #include <linux/kernel.h> #include <linux/socket.h> #include <linux/net.h> diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c new file mode 100644 index 000000000000..8c011e020769 --- /dev/null +++ b/net/netfilter/xt_CONNSECMARK.c @@ -0,0 +1,155 @@ +/* + * This module is used to copy security markings from packets + * to connections, and restore security markings from connections + * back to packets. This would normally be performed in conjunction + * with the SECMARK target and state match. + * + * Based somewhat on CONNMARK: + * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com> + * by Henrik Nordstrom <hno@marasystems.com> + * + * (C) 2006 Red Hat, Inc., James Morris <jmorris@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/xt_CONNSECMARK.h> +#include <net/netfilter/nf_conntrack_compat.h> + +#define PFX "CONNSECMARK: " + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("James Morris <jmorris@redhat.com>"); +MODULE_DESCRIPTION("ip[6]tables CONNSECMARK module"); +MODULE_ALIAS("ipt_CONNSECMARK"); +MODULE_ALIAS("ip6t_CONNSECMARK"); + +/* + * If the packet has a security mark and the connection does not, copy + * the security mark from the packet to the connection. + */ +static void secmark_save(struct sk_buff *skb) +{ + if (skb->secmark) { + u32 *connsecmark; + enum ip_conntrack_info ctinfo; + + connsecmark = nf_ct_get_secmark(skb, &ctinfo); + if (connsecmark && !*connsecmark) + if (*connsecmark != skb->secmark) + *connsecmark = skb->secmark; + } +} + +/* + * If packet has no security mark, and the connection does, restore the + * security mark from the connection to the packet. + */ +static void secmark_restore(struct sk_buff *skb) +{ + if (!skb->secmark) { + u32 *connsecmark; + enum ip_conntrack_info ctinfo; + + connsecmark = nf_ct_get_secmark(skb, &ctinfo); + if (connsecmark && *connsecmark) + if (skb->secmark != *connsecmark) + skb->secmark = *connsecmark; + } +} + +static unsigned int target(struct sk_buff **pskb, const struct net_device *in, + const struct net_device *out, unsigned int hooknum, + const struct xt_target *target, + const void *targinfo, void *userinfo) +{ + struct sk_buff *skb = *pskb; + const struct xt_connsecmark_target_info *info = targinfo; + + switch (info->mode) { + case CONNSECMARK_SAVE: + secmark_save(skb); + break; + + case CONNSECMARK_RESTORE: + secmark_restore(skb); + break; + + default: + BUG(); + } + + return XT_CONTINUE; +} + +static int checkentry(const char *tablename, const void *entry, + const struct xt_target *target, void *targinfo, + unsigned int targinfosize, unsigned int hook_mask) +{ + struct xt_connsecmark_target_info *info = targinfo; + + switch (info->mode) { + case CONNSECMARK_SAVE: + case CONNSECMARK_RESTORE: + break; + + default: + printk(KERN_INFO PFX "invalid mode: %hu\n", info->mode); + return 0; + } + + return 1; +} + +static struct xt_target ipt_connsecmark_reg = { + .name = "CONNSECMARK", + .target = target, + .targetsize = sizeof(struct xt_connsecmark_target_info), + .table = "mangle", + .checkentry = checkentry, + .me = THIS_MODULE, + .family = AF_INET, + .revision = 0, +}; + +static struct xt_target ip6t_connsecmark_reg = { + .name = "CONNSECMARK", + .target = target, + .targetsize = sizeof(struct xt_connsecmark_target_info), + .table = "mangle", + .checkentry = checkentry, + .me = THIS_MODULE, + .family = AF_INET6, + .revision = 0, +}; + +static int __init xt_connsecmark_init(void) +{ + int err; + + need_conntrack(); + + err = xt_register_target(&ipt_connsecmark_reg); + if (err) + return err; + + err = xt_register_target(&ip6t_connsecmark_reg); + if (err) + xt_unregister_target(&ipt_connsecmark_reg); + + return err; +} + +static void __exit xt_connsecmark_fini(void) +{ + xt_unregister_target(&ip6t_connsecmark_reg); + xt_unregister_target(&ipt_connsecmark_reg); +} + +module_init(xt_connsecmark_init); +module_exit(xt_connsecmark_fini); diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c new file mode 100644 index 000000000000..c2ce9c4011cc --- /dev/null +++ b/net/netfilter/xt_SECMARK.c @@ -0,0 +1,156 @@ +/* + * Module for modifying the secmark field of the skb, for use by + * security subsystems. + * + * Based on the nfmark match by: + * (C) 1999-2001 Marc Boucher <marc@mbsi.ca> + * + * (C) 2006 Red Hat, Inc., James Morris <jmorris@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/selinux.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/xt_SECMARK.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("James Morris <jmorris@redhat.com>"); +MODULE_DESCRIPTION("ip[6]tables SECMARK modification module"); +MODULE_ALIAS("ipt_SECMARK"); +MODULE_ALIAS("ip6t_SECMARK"); + +#define PFX "SECMARK: " + +static u8 mode; + +static unsigned int target(struct sk_buff **pskb, const struct net_device *in, + const struct net_device *out, unsigned int hooknum, + const struct xt_target *target, + const void *targinfo, void *userinfo) +{ + u32 secmark = 0; + const struct xt_secmark_target_info *info = targinfo; + + BUG_ON(info->mode != mode); + + switch (mode) { + case SECMARK_MODE_SEL: + secmark = info->u.sel.selsid; + break; + + default: + BUG(); + } + + if ((*pskb)->secmark != secmark) + (*pskb)->secmark = secmark; + + return XT_CONTINUE; +} + +static int checkentry_selinux(struct xt_secmark_target_info *info) +{ + int err; + struct xt_secmark_target_selinux_info *sel = &info->u.sel; + + err = selinux_string_to_sid(sel->selctx, &sel->selsid); + if (err) { + if (err == -EINVAL) + printk(KERN_INFO PFX "invalid SELinux context \'%s\'\n", + sel->selctx); + return 0; + } + + if (!sel->selsid) { + printk(KERN_INFO PFX "unable to map SELinux context \'%s\'\n", + sel->selctx); + return 0; + } + + err = selinux_relabel_packet_permission(sel->selsid); + if (err) { + printk(KERN_INFO PFX "unable to obtain relabeling permission\n"); + return 0; + } + + return 1; +} + +static int checkentry(const char *tablename, const void *entry, + const struct xt_target *target, void *targinfo, + unsigned int targinfosize, unsigned int hook_mask) +{ + struct xt_secmark_target_info *info = targinfo; + + if (mode && mode != info->mode) { + printk(KERN_INFO PFX "mode already set to %hu cannot mix with " + "rules for mode %hu\n", mode, info->mode); + return 0; + } + + switch (info->mode) { + case SECMARK_MODE_SEL: + if (!checkentry_selinux(info)) + return 0; + break; + + default: + printk(KERN_INFO PFX "invalid mode: %hu\n", info->mode); + return 0; + } + + if (!mode) + mode = info->mode; + return 1; +} + +static struct xt_target ipt_secmark_reg = { + .name = "SECMARK", + .target = target, + .targetsize = sizeof(struct xt_secmark_target_info), + .table = "mangle", + .checkentry = checkentry, + .me = THIS_MODULE, + .family = AF_INET, + .revision = 0, +}; + +static struct xt_target ip6t_secmark_reg = { + .name = "SECMARK", + .target = target, + .targetsize = sizeof(struct xt_secmark_target_info), + .table = "mangle", + .checkentry = checkentry, + .me = THIS_MODULE, + .family = AF_INET6, + .revision = 0, +}; + +static int __init xt_secmark_init(void) +{ + int err; + + err = xt_register_target(&ipt_secmark_reg); + if (err) + return err; + + err = xt_register_target(&ip6t_secmark_reg); + if (err) + xt_unregister_target(&ipt_secmark_reg); + + return err; +} + +static void __exit xt_secmark_fini(void) +{ + xt_unregister_target(&ip6t_secmark_reg); + xt_unregister_target(&ipt_secmark_reg); +} + +module_init(xt_secmark_init); +module_exit(xt_secmark_fini); diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index dc26a27cbcaf..56324c8aff0a 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -58,7 +58,7 @@ checkentry(const char *tablename, unsigned int matchsize, unsigned int hook_mask) { - struct xt_connmark_info *cm = (struct xt_connmark_info *)matchinfo; + struct xt_connmark_info *cm = matchinfo; if (cm->mark > 0xffffffff || cm->mask > 0xffffffff) { printk(KERN_WARNING "connmark: only support 32bit mark\n"); diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c index dfb10b648e57..2e2f825dad4c 100644 --- a/net/netfilter/xt_dccp.c +++ b/net/netfilter/xt_dccp.c @@ -101,8 +101,7 @@ match(const struct sk_buff *skb, unsigned int protoff, int *hotdrop) { - const struct xt_dccp_info *info = - (const struct xt_dccp_info *)matchinfo; + const struct xt_dccp_info *info = matchinfo; struct dccp_hdr _dh, *dh; if (offset) diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c index 8b385a34886d..876bc5797738 100644 --- a/net/netfilter/xt_mark.c +++ b/net/netfilter/xt_mark.c @@ -42,7 +42,7 @@ checkentry(const char *tablename, unsigned int matchsize, unsigned int hook_mask) { - struct xt_mark_info *minfo = (struct xt_mark_info *) matchinfo; + const struct xt_mark_info *minfo = matchinfo; if (minfo->mark > 0xffffffff || minfo->mask > 0xffffffff) { printk(KERN_WARNING "mark: only supports 32bit mark\n"); diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c index b56cd2baaac2..1ff0a25396e7 100644 --- a/net/netfilter/xt_multiport.c +++ b/net/netfilter/xt_multiport.c @@ -1,4 +1,4 @@ -/* Kernel module to match one of a list of TCP/UDP ports: ports are in +/* Kernel module to match one of a list of TCP/UDP/SCTP/DCCP ports: ports are in the same place so we can treat them as equal. */ /* (C) 1999-2001 Paul `Rusty' Russell @@ -160,8 +160,9 @@ check(u_int16_t proto, u_int8_t match_flags, u_int8_t count) { - /* Must specify proto == TCP/UDP, no unknown flags or bad count */ - return (proto == IPPROTO_TCP || proto == IPPROTO_UDP) + /* Must specify supported protocol, no unknown flags or bad count */ + return (proto == IPPROTO_TCP || proto == IPPROTO_UDP + || proto == IPPROTO_SCTP || proto == IPPROTO_DCCP) && !(ip_invflags & XT_INV_PROTO) && (match_flags == XT_MULTIPORT_SOURCE || match_flags == XT_MULTIPORT_DESTINATION diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c index a3aa62fbda6f..ba1ca03abad3 100644 --- a/net/netfilter/xt_policy.c +++ b/net/netfilter/xt_policy.c @@ -8,7 +8,6 @@ */ #include <linux/kernel.h> -#include <linux/config.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/init.h> diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c new file mode 100644 index 000000000000..4cdba7469dc4 --- /dev/null +++ b/net/netfilter/xt_quota.c @@ -0,0 +1,96 @@ +/* + * netfilter module to enforce network quotas + * + * Sam Johnston <samj@samj.net> + */ +#include <linux/skbuff.h> +#include <linux/spinlock.h> + +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/xt_quota.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Sam Johnston <samj@samj.net>"); + +static DEFINE_SPINLOCK(quota_lock); + +static int +match(const struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + const struct xt_match *match, const void *matchinfo, + int offset, unsigned int protoff, int *hotdrop) +{ + struct xt_quota_info *q = ((struct xt_quota_info *)matchinfo)->master; + int ret = q->flags & XT_QUOTA_INVERT ? 1 : 0; + + spin_lock_bh("a_lock); + if (q->quota >= skb->len) { + q->quota -= skb->len; + ret ^= 1; + } else { + /* we do not allow even small packets from now on */ + q->quota = 0; + } + spin_unlock_bh("a_lock); + + return ret; +} + +static int +checkentry(const char *tablename, const void *entry, + const struct xt_match *match, void *matchinfo, + unsigned int matchsize, unsigned int hook_mask) +{ + struct xt_quota_info *q = (struct xt_quota_info *)matchinfo; + + if (q->flags & ~XT_QUOTA_MASK) + return 0; + /* For SMP, we only want to use one set of counters. */ + q->master = q; + return 1; +} + +static struct xt_match quota_match = { + .name = "quota", + .family = AF_INET, + .match = match, + .matchsize = sizeof(struct xt_quota_info), + .checkentry = checkentry, + .me = THIS_MODULE +}; + +static struct xt_match quota_match6 = { + .name = "quota", + .family = AF_INET6, + .match = match, + .matchsize = sizeof(struct xt_quota_info), + .checkentry = checkentry, + .me = THIS_MODULE +}; + +static int __init xt_quota_init(void) +{ + int ret; + + ret = xt_register_match("a_match); + if (ret) + goto err1; + ret = xt_register_match("a_match6); + if (ret) + goto err2; + return ret; + +err2: + xt_unregister_match("a_match); +err1: + return ret; +} + +static void __exit xt_quota_fini(void) +{ + xt_unregister_match("a_match6); + xt_unregister_match("a_match); +} + +module_init(xt_quota_init); +module_exit(xt_quota_fini); diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c index 34bd87259a09..843383e01d41 100644 --- a/net/netfilter/xt_sctp.c +++ b/net/netfilter/xt_sctp.c @@ -62,7 +62,7 @@ match_packet(const struct sk_buff *skb, do { sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch); - if (sch == NULL) { + if (sch == NULL || sch->length == 0) { duprintf("Dropping invalid SCTP packet.\n"); *hotdrop = 1; return 0; @@ -129,11 +129,9 @@ match(const struct sk_buff *skb, unsigned int protoff, int *hotdrop) { - const struct xt_sctp_info *info; + const struct xt_sctp_info *info = matchinfo; sctp_sctphdr_t _sh, *sh; - info = (const struct xt_sctp_info *)matchinfo; - if (offset) { duprintf("Dropping non-first fragment.. FIXME\n"); return 0; @@ -153,7 +151,7 @@ match(const struct sk_buff *skb, && SCCHECK(((ntohs(sh->dest) >= info->dpts[0]) && (ntohs(sh->dest) <= info->dpts[1])), XT_SCTP_DEST_PORTS, info->flags, info->invflags) - && SCCHECK(match_packet(skb, protoff, + && SCCHECK(match_packet(skb, protoff + sizeof (sctp_sctphdr_t), info->chunkmap, info->chunk_match_type, info->flag_info, info->flag_count, hotdrop), diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c new file mode 100644 index 000000000000..de1037f58596 --- /dev/null +++ b/net/netfilter/xt_statistic.c @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2006 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on ipt_random and ipt_nth by Fabrice MARIE <fabrice@netfilter.org>. + */ + +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/skbuff.h> +#include <linux/net.h> + +#include <linux/netfilter/xt_statistic.h> +#include <linux/netfilter/x_tables.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_DESCRIPTION("xtables statistical match module"); +MODULE_ALIAS("ipt_statistic"); +MODULE_ALIAS("ip6t_statistic"); + +static DEFINE_SPINLOCK(nth_lock); + +static int +match(const struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + const struct xt_match *match, const void *matchinfo, + int offset, unsigned int protoff, int *hotdrop) +{ + struct xt_statistic_info *info = (struct xt_statistic_info *)matchinfo; + int ret = info->flags & XT_STATISTIC_INVERT ? 1 : 0; + + switch (info->mode) { + case XT_STATISTIC_MODE_RANDOM: + if ((net_random() & 0x7FFFFFFF) < info->u.random.probability) + ret ^= 1; + break; + case XT_STATISTIC_MODE_NTH: + info = info->master; + spin_lock_bh(&nth_lock); + if (info->u.nth.count++ == info->u.nth.every) { + info->u.nth.count = 0; + ret ^= 1; + } + spin_unlock_bh(&nth_lock); + break; + } + + return ret; +} + +static int +checkentry(const char *tablename, const void *entry, + const struct xt_match *match, void *matchinfo, + unsigned int matchsize, unsigned int hook_mask) +{ + struct xt_statistic_info *info = (struct xt_statistic_info *)matchinfo; + + if (info->mode > XT_STATISTIC_MODE_MAX || + info->flags & ~XT_STATISTIC_MASK) + return 0; + info->master = info; + return 1; +} + +static struct xt_match statistic_match = { + .name = "statistic", + .match = match, + .matchsize = sizeof(struct xt_statistic_info), + .checkentry = checkentry, + .family = AF_INET, + .me = THIS_MODULE, +}; + +static struct xt_match statistic_match6 = { + .name = "statistic", + .match = match, + .matchsize = sizeof(struct xt_statistic_info), + .checkentry = checkentry, + .family = AF_INET6, + .me = THIS_MODULE, +}; + +static int __init xt_statistic_init(void) +{ + int ret; + + ret = xt_register_match(&statistic_match); + if (ret) + goto err1; + + ret = xt_register_match(&statistic_match6); + if (ret) + goto err2; + return ret; +err2: + xt_unregister_match(&statistic_match); +err1: + return ret; +} + +static void __exit xt_statistic_fini(void) +{ + xt_unregister_match(&statistic_match6); + xt_unregister_match(&statistic_match); +} + +module_init(xt_statistic_init); +module_exit(xt_statistic_fini); diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c index 79d9ea6964ba..0ebb6ac2c8c7 100644 --- a/net/netfilter/xt_string.c +++ b/net/netfilter/xt_string.c @@ -30,8 +30,8 @@ static int match(const struct sk_buff *skb, unsigned int protoff, int *hotdrop) { + const struct xt_string_info *conf = matchinfo; struct ts_state state; - struct xt_string_info *conf = (struct xt_string_info *) matchinfo; memset(&state, 0, sizeof(struct ts_state)); diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c index 1b61dac9c873..a9a63aa68936 100644 --- a/net/netfilter/xt_tcpudp.c +++ b/net/netfilter/xt_tcpudp.c @@ -260,7 +260,7 @@ static int __init xt_tcpudp_init(void) return ret; out_unreg_udp: - xt_unregister_match(&tcp_matchstruct); + xt_unregister_match(&udp_matchstruct); out_unreg_tcp6: xt_unregister_match(&tcp6_matchstruct); out_unreg_tcp: diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 3862e73d14d7..55c0adc8f115 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -21,7 +21,6 @@ * mandatory if CONFIG_NET=y these days */ -#include <linux/config.h> #include <linux/module.h> #include <linux/capability.h> @@ -157,7 +156,7 @@ static void netlink_sock_destruct(struct sock *sk) static void netlink_table_grab(void) { - write_lock_bh(&nl_table_lock); + write_lock_irq(&nl_table_lock); if (atomic_read(&nl_table_users)) { DECLARE_WAITQUEUE(wait, current); @@ -167,9 +166,9 @@ static void netlink_table_grab(void) set_current_state(TASK_UNINTERRUPTIBLE); if (atomic_read(&nl_table_users) == 0) break; - write_unlock_bh(&nl_table_lock); + write_unlock_irq(&nl_table_lock); schedule(); - write_lock_bh(&nl_table_lock); + write_lock_irq(&nl_table_lock); } __set_current_state(TASK_RUNNING); @@ -179,7 +178,7 @@ static void netlink_table_grab(void) static __inline__ void netlink_table_ungrab(void) { - write_unlock_bh(&nl_table_lock); + write_unlock_irq(&nl_table_lock); wake_up(&nl_table_wait); } diff --git a/net/netlink/attr.c b/net/netlink/attr.c index fffef4ab276f..dddbd15135a8 100644 --- a/net/netlink/attr.c +++ b/net/netlink/attr.c @@ -5,7 +5,6 @@ * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> */ -#include <linux/config.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/errno.h> diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index f329b72578f5..a298f77cc3e3 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -5,7 +5,6 @@ * Thomas Graf <tgraf@suug.ch> */ -#include <linux/config.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/errno.h> @@ -320,7 +319,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, goto errout; } - if ((ops->flags & GENL_ADMIN_PERM) && security_netlink_recv(skb)) { + if ((ops->flags & GENL_ADMIN_PERM) && security_netlink_recv(skb, CAP_NET_ADMIN)) { err = -EPERM; goto errout; } diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 3669cb953e6e..1d50f801f181 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -8,7 +8,6 @@ * Copyright Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk) * Copyright Darryl Miles G7LED (dlm@g7led.demon.co.uk) */ -#include <linux/config.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/capability.h> @@ -67,6 +66,14 @@ static DEFINE_SPINLOCK(nr_list_lock); static const struct proto_ops nr_proto_ops; /* + * NETROM network devices are virtual network devices encapsulating NETROM + * frames into AX.25 which will be sent through an AX.25 device, so form a + * special "super class" of normal net devices; split their locks off into a + * separate class since they always nest. + */ +static struct lock_class_key nr_netdev_xmit_lock_key; + +/* * Socket removal during an interrupt is now safe. */ static void nr_remove_socket(struct sock *sk) @@ -801,7 +808,7 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags) /* Now attach up the new socket */ kfree_skb(skb); - sk->sk_ack_backlog--; + sk_acceptq_removed(sk); newsock->sk = newsk; out: @@ -986,19 +993,19 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) nr_make->vr = 0; nr_make->vl = 0; nr_make->state = NR_STATE_3; - sk->sk_ack_backlog++; - - nr_insert_socket(make); - + sk_acceptq_added(sk); skb_queue_head(&sk->sk_receive_queue, skb); - nr_start_heartbeat(make); - nr_start_idletimer(make); - if (!sock_flag(sk, SOCK_DEAD)) sk->sk_data_ready(sk, skb->len); bh_unlock_sock(sk); + + nr_insert_socket(make); + + nr_start_heartbeat(make); + nr_start_idletimer(make); + return 1; } @@ -1383,14 +1390,12 @@ static int __init nr_proto_init(void) return -1; } - dev_nr = kmalloc(nr_ndevs * sizeof(struct net_device *), GFP_KERNEL); + dev_nr = kzalloc(nr_ndevs * sizeof(struct net_device *), GFP_KERNEL); if (dev_nr == NULL) { printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device array\n"); return -1; } - memset(dev_nr, 0x00, nr_ndevs * sizeof(struct net_device *)); - for (i = 0; i < nr_ndevs; i++) { char name[IFNAMSIZ]; struct net_device *dev; @@ -1408,6 +1413,7 @@ static int __init nr_proto_init(void) free_netdev(dev); goto fail; } + lockdep_set_class(&dev->_xmit_lock, &nr_netdev_xmit_lock_key); dev_nr[i] = dev; } diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c index 621e5586ab03..9b8eb54971ab 100644 --- a/net/netrom/nr_dev.c +++ b/net/netrom/nr_dev.c @@ -6,7 +6,6 @@ * * Copyright Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) */ -#include <linux/config.h> #include <linux/module.h> #include <linux/proc_fs.h> #include <linux/kernel.h> diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index b3b9097c87c7..c11737f472d6 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -725,15 +725,17 @@ void nr_link_failed(ax25_cb *ax25, int reason) struct nr_node *nr_node = NULL; spin_lock_bh(&nr_neigh_list_lock); - nr_neigh_for_each(s, node, &nr_neigh_list) + nr_neigh_for_each(s, node, &nr_neigh_list) { if (s->ax25 == ax25) { nr_neigh_hold(s); nr_neigh = s; break; } + } spin_unlock_bh(&nr_neigh_list_lock); - if (nr_neigh == NULL) return; + if (nr_neigh == NULL) + return; nr_neigh->ax25 = NULL; ax25_cb_put(ax25); @@ -743,11 +745,13 @@ void nr_link_failed(ax25_cb *ax25, int reason) return; } spin_lock_bh(&nr_node_list_lock); - nr_node_for_each(nr_node, node, &nr_node_list) + nr_node_for_each(nr_node, node, &nr_node_list) { nr_node_lock(nr_node); - if (nr_node->which < nr_node->count && nr_node->routes[nr_node->which].neighbour == nr_neigh) + if (nr_node->which < nr_node->count && + nr_node->routes[nr_node->which].neighbour == nr_neigh) nr_node->which++; nr_node_unlock(nr_node); + } spin_unlock_bh(&nr_node_list_lock); nr_neigh_put(nr_neigh); } diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c index 75b72d389ba9..ddba1c144260 100644 --- a/net/netrom/nr_timer.c +++ b/net/netrom/nr_timer.c @@ -138,8 +138,8 @@ static void nr_heartbeat_expiry(unsigned long param) if (sock_flag(sk, SOCK_DESTROY) || (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) { sock_hold(sk); - nr_destroy_socket(sk); bh_unlock_sock(sk); + nr_destroy_socket(sk); sock_put(sk); return; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 9db7dbdb16e6..f9cef3671593 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -49,7 +49,6 @@ * */ -#include <linux/config.h> #include <linux/types.h> #include <linux/sched.h> #include <linux/mm.h> diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 55564efccf11..08a542855654 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -10,7 +10,6 @@ * Copyright (C) Tomi Manninen OH2BNS (oh2bns@sral.fi) */ -#include <linux/config.h> #include <linux/capability.h> #include <linux/module.h> #include <linux/moduleparam.h> @@ -68,6 +67,14 @@ static struct proto_ops rose_proto_ops; ax25_address rose_callsign; /* + * ROSE network devices are virtual network devices encapsulating ROSE + * frames into AX.25 which will be sent through an AX.25 device, so form a + * special "super class" of normal net devices; split their locks off into a + * separate class since they always nest. + */ +static struct lock_class_key rose_netdev_xmit_lock_key; + +/* * Convert a ROSE address into text. */ const char *rose2asc(const rose_address *addr) @@ -753,7 +760,7 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le rose_insert_socket(sk); /* Finish the bind */ } - +rose_try_next_neigh: rose->dest_addr = addr->srose_addr; rose->dest_call = addr->srose_call; rose->rand = ((long)rose & 0xFFFF) + rose->lci; @@ -811,6 +818,11 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le } if (sk->sk_state != TCP_ESTABLISHED) { + /* Try next neighbour */ + rose->neighbour = rose_get_neigh(&addr->srose_addr, &cause, &diagnostic); + if (rose->neighbour) + goto rose_try_next_neigh; + /* No more neighbour */ sock->state = SS_UNCONNECTED; return sock_error(sk); /* Always set at this point */ } @@ -1486,14 +1498,13 @@ static int __init rose_proto_init(void) rose_callsign = null_ax25_address; - dev_rose = kmalloc(rose_ndevs * sizeof(struct net_device *), GFP_KERNEL); + dev_rose = kzalloc(rose_ndevs * sizeof(struct net_device *), GFP_KERNEL); if (dev_rose == NULL) { printk(KERN_ERR "ROSE: rose_proto_init - unable to allocate device structure\n"); rc = -ENOMEM; goto out_proto_unregister; } - memset(dev_rose, 0x00, rose_ndevs * sizeof(struct net_device*)); for (i = 0; i < rose_ndevs; i++) { struct net_device *dev; char name[IFNAMSIZ]; @@ -1512,6 +1523,7 @@ static int __init rose_proto_init(void) free_netdev(dev); goto fail; } + lockdep_set_class(&dev->_xmit_lock, &rose_netdev_xmit_lock_key); dev_rose[i] = dev; } diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c index 2a1bf8e119e5..7c279e2659ec 100644 --- a/net/rose/rose_dev.c +++ b/net/rose/rose_dev.c @@ -6,7 +6,6 @@ * * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) */ -#include <linux/config.h> #include <linux/module.h> #include <linux/proc_fs.h> #include <linux/kernel.h> @@ -60,6 +59,7 @@ static int rose_rebuild_header(struct sk_buff *skb) struct net_device_stats *stats = netdev_priv(dev); unsigned char *bp = (unsigned char *)skb->data; struct sk_buff *skbn; + unsigned int len; #ifdef CONFIG_INET if (arp_find(bp + 7, skb)) { @@ -76,6 +76,8 @@ static int rose_rebuild_header(struct sk_buff *skb) kfree_skb(skb); + len = skbn->len; + if (!rose_route_frame(skbn, NULL)) { kfree_skb(skbn); stats->tx_errors++; @@ -83,7 +85,7 @@ static int rose_rebuild_header(struct sk_buff *skb) } stats->tx_packets++; - stats->tx_bytes += skbn->len; + stats->tx_bytes += len; #endif return 1; } diff --git a/net/rxrpc/call.c b/net/rxrpc/call.c index c4aeb7d40266..d07122b57e0d 100644 --- a/net/rxrpc/call.c +++ b/net/rxrpc/call.c @@ -1098,8 +1098,7 @@ static void rxrpc_call_receive_data_packet(struct rxrpc_call *call, call->app_ready_seq = pmsg->seq; call->app_ready_qty += pmsg->dsize; - list_del_init(&pmsg->link); - list_add_tail(&pmsg->link, &call->app_readyq); + list_move_tail(&pmsg->link, &call->app_readyq); } /* see if we've got the last packet yet */ diff --git a/net/rxrpc/connection.c b/net/rxrpc/connection.c index 0e0a4553499f..573b572f8f91 100644 --- a/net/rxrpc/connection.c +++ b/net/rxrpc/connection.c @@ -402,8 +402,7 @@ void rxrpc_put_connection(struct rxrpc_connection *conn) /* move to graveyard queue */ _debug("burying connection: {%08x}", ntohl(conn->conn_id)); - list_del(&conn->link); - list_add_tail(&conn->link, &peer->conn_graveyard); + list_move_tail(&conn->link, &peer->conn_graveyard); rxrpc_krxtimod_add_timer(&conn->timeout, rxrpc_conn_timeout * HZ); diff --git a/net/rxrpc/krxsecd.c b/net/rxrpc/krxsecd.c index 1aadd026d354..cea4eb5e2497 100644 --- a/net/rxrpc/krxsecd.c +++ b/net/rxrpc/krxsecd.c @@ -160,8 +160,7 @@ void rxrpc_krxsecd_clear_transport(struct rxrpc_transport *trans) list_for_each_safe(_p, _n, &rxrpc_krxsecd_initmsgq) { msg = list_entry(_p, struct rxrpc_message, link); if (msg->trans == trans) { - list_del(&msg->link); - list_add_tail(&msg->link, &tmp); + list_move_tail(&msg->link, &tmp); atomic_dec(&rxrpc_krxsecd_qcount); } } diff --git a/net/rxrpc/rxrpc_syms.c b/net/rxrpc/rxrpc_syms.c index 56adf16fed0c..9896fd87a4d4 100644 --- a/net/rxrpc/rxrpc_syms.c +++ b/net/rxrpc/rxrpc_syms.c @@ -9,7 +9,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/module.h> #include <rxrpc/transport.h> diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index fbf98729c748..6374df7e77d1 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -9,7 +9,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/module.h> diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 13eeee582886..8298ea9ffe19 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -305,7 +305,7 @@ config NET_CLS_U32 tristate "Universal 32bit comparisons w/ hashing (U32)" select NET_CLS ---help--- - Say Y here to be able to classify packetes using a universal + Say Y here to be able to classify packets using a universal 32bit pieces based comparison scheme. To compile this code as a module, choose M here: the @@ -485,7 +485,7 @@ config NET_ACT_IPT tristate "IPtables targets" depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES ---help--- - Say Y here to be able to invoke iptables targets after succesful + Say Y here to be able to invoke iptables targets after successful classification. To compile this code as a module, choose M here: the @@ -537,8 +537,8 @@ config NET_ESTIMATOR ---help--- Say Y here to allow using rate estimators to estimate the current rate-of-flow for network devices, queues, etc. This module is - automaticaly selected if needed but can be selected manually for - statstical purposes. + automatically selected if needed but can be selected manually for + statistical purposes. endif # NET_SCHED diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 2ffa11c6e8de..9affeeedf107 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -14,7 +14,6 @@ #include <asm/uaccess.h> #include <asm/system.h> #include <linux/bitops.h> -#include <linux/config.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/sched.h> @@ -251,15 +250,17 @@ tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref) RTA_PUT(skb, a->order, 0, NULL); err = tcf_action_dump_1(skb, a, bind, ref); if (err < 0) - goto rtattr_failure; + goto errout; r->rta_len = skb->tail - (u8*)r; } return 0; rtattr_failure: + err = -EINVAL; +errout: skb_trim(skb, b - skb->data); - return -err; + return err; } struct tc_action *tcf_action_init_1(struct rtattr *rta, struct rtattr *est, @@ -306,6 +307,7 @@ struct tc_action *tcf_action_init_1(struct rtattr *rta, struct rtattr *est, goto err_mod; } #endif + *err = -ENOENT; goto err_out; } @@ -600,8 +602,8 @@ static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid) return err; rtattr_failure: - module_put(a->ops->owner); nlmsg_failure: + module_put(a->ops->owner); err_out: kfree_skb(skb); kfree(a); @@ -777,7 +779,7 @@ replay: return ret; } -static char * +static struct rtattr * find_dump_kind(struct nlmsghdr *n) { struct rtattr *tb1, *tb2[TCA_ACT_MAX+1]; @@ -805,7 +807,7 @@ find_dump_kind(struct nlmsghdr *n) return NULL; kind = tb2[TCA_ACT_KIND-1]; - return (char *) RTA_DATA(kind); + return kind; } static int @@ -818,16 +820,15 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) struct tc_action a; int ret = 0; struct tcamsg *t = (struct tcamsg *) NLMSG_DATA(cb->nlh); - char *kind = find_dump_kind(cb->nlh); + struct rtattr *kind = find_dump_kind(cb->nlh); if (kind == NULL) { printk("tc_dump_action: action bad kind\n"); return 0; } - a_o = tc_lookup_action_n(kind); + a_o = tc_lookup_action(kind); if (a_o == NULL) { - printk("failed to find %s\n", kind); return 0; } @@ -835,7 +836,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) a.ops = a_o; if (a_o->walk == NULL) { - printk("tc_dump_action: %s !capable of dumping table\n", kind); + printk("tc_dump_action: %s !capable of dumping table\n", a_o->kind); goto rtattr_failure; } @@ -883,8 +884,6 @@ static int __init tc_action_init(void) link_p[RTM_GETACTION-RTM_BASE].dumpit = tc_dump_action; } - printk("TC classifier action (bugs to netdev@vger.kernel.org cc " - "hadi@cyberus.ca)\n"); return 0; } diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index a1e68f78dcc2..e75a147ad60f 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -13,7 +13,6 @@ #include <asm/uaccess.h> #include <asm/system.h> #include <linux/bitops.h> -#include <linux/config.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/sched.h> diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 37640c6fc014..d799e01248c4 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -14,7 +14,6 @@ #include <asm/uaccess.h> #include <asm/system.h> #include <asm/bitops.h> -#include <linux/config.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/sched.h> diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 4fcccbd50885..fc562047ecc5 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -15,7 +15,6 @@ #include <asm/uaccess.h> #include <asm/system.h> #include <asm/bitops.h> -#include <linux/config.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/sched.h> diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 1742a68e0122..58b3a8652042 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -12,7 +12,6 @@ #include <asm/uaccess.h> #include <asm/system.h> #include <asm/bitops.h> -#include <linux/config.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/sched.h> diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 24c348fa8922..47e00bd9625e 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -13,7 +13,6 @@ #include <asm/uaccess.h> #include <asm/system.h> #include <linux/bitops.h> -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index e5f2e1f431e2..17105c82537f 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -10,7 +10,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/init.h> #include <linux/kernel.h> diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index b4d89fbb3782..7e14f14058e9 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -17,7 +17,6 @@ #include <asm/uaccess.h> #include <asm/system.h> #include <linux/bitops.h> -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index dfb300bb6baa..61507f006b11 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -9,7 +9,6 @@ * Authors: Thomas Graf <tgraf@suug.ch> */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 75470486e405..d41de91fc4f6 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -18,7 +18,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <asm/uaccess.h> #include <asm/system.h> diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 520ff716dab2..c2e71900f7bd 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -10,7 +10,6 @@ */ #include <linux/module.h> -#include <linux/config.h> #include <asm/uaccess.h> #include <asm/system.h> #include <linux/bitops.h> diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 572f06be3b02..ba8741971629 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -65,7 +65,6 @@ Well, as result, despite its simplicity, we get a pretty powerful classification engine. */ -#include <linux/config.h> struct rsvp_head { diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 9f921174c8ab..7870e7bb0bac 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -4,7 +4,6 @@ * Written 1998,1999 by Werner Almesberger, EPFL ICA */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 78e052591fa9..d712edcd1bcf 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -33,7 +33,6 @@ #include <asm/uaccess.h> #include <asm/system.h> #include <linux/bitops.h> -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c index bf1f00f8b1bf..8ed93c39b4ea 100644 --- a/net/sched/em_cmp.c +++ b/net/sched/em_cmp.c @@ -9,7 +9,6 @@ * Authors: Thomas Graf <tgraf@suug.ch> */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 700844d49d79..698372954f4d 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -58,7 +58,6 @@ * only available if that subsytem is enabled in the kernel. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c index 71ea926a9f09..cc80babfd79f 100644 --- a/net/sched/em_nbyte.c +++ b/net/sched/em_nbyte.c @@ -9,7 +9,6 @@ * Authors: Thomas Graf <tgraf@suug.ch> */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/sched/em_text.c b/net/sched/em_text.c index 77beabc91fa3..aa17d8f7c4c8 100644 --- a/net/sched/em_text.c +++ b/net/sched/em_text.c @@ -9,7 +9,6 @@ * Authors: Thomas Graf <tgraf@suug.ch> */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c index 34e7e51e601e..e3ddfce0ac8d 100644 --- a/net/sched/em_u32.c +++ b/net/sched/em_u32.c @@ -12,7 +12,6 @@ * Based on net/sched/cls_u32.c */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 5cb956b721e8..2405a86093a2 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -81,7 +81,6 @@ * open up a beer to watch the compilation going. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 31570b9a6e9a..c7844bacbbcb 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -15,7 +15,6 @@ * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index ac7cb60d1e25..dbf44da0912f 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -3,7 +3,6 @@ /* Written 1998-2000 by Werner Almesberger, EPFL ICA */ -#include <linux/config.h> #include <linux/module.h> #include <linux/init.h> #include <linux/string.h> diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c index 81f0b8346d17..cb0c456aa349 100644 --- a/net/sched/sch_blackhole.c +++ b/net/sched/sch_blackhole.c @@ -11,7 +11,6 @@ * Note: Quantum tunneling is not supported. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 6cd81708bf71..80b7f6a8d008 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -10,7 +10,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <asm/uaccess.h> #include <asm/system.h> diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index f6320ca70493..11c8a2119b96 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -3,7 +3,6 @@ /* Written 1998-2000 by Werner Almesberger, EPFL ICA */ -#include <linux/config.h> #include <linux/module.h> #include <linux/init.h> #include <linux/types.h> diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 033083bf0e74..c2689f4ba8de 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -9,7 +9,6 @@ * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 138ea92ed268..d735f51686a1 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -14,7 +14,6 @@ #include <asm/uaccess.h> #include <asm/system.h> #include <linux/bitops.h> -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> @@ -72,9 +71,9 @@ void qdisc_unlock_tree(struct net_device *dev) dev->queue_lock serializes queue accesses for this device AND dev->qdisc pointer itself. - dev->xmit_lock serializes accesses to device driver. + netif_tx_lock serializes accesses to device driver. - dev->queue_lock and dev->xmit_lock are mutually exclusive, + dev->queue_lock and netif_tx_lock are mutually exclusive, if one is grabbed, another must be free. */ @@ -90,14 +89,17 @@ void qdisc_unlock_tree(struct net_device *dev) NOTE: Called under dev->queue_lock with locally disabled BH. */ -int qdisc_restart(struct net_device *dev) +static inline int qdisc_restart(struct net_device *dev) { struct Qdisc *q = dev->qdisc; struct sk_buff *skb; /* Dequeue packet */ - if ((skb = q->dequeue(q)) != NULL) { + if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) { unsigned nolock = (dev->features & NETIF_F_LLTX); + + dev->gso_skb = NULL; + /* * When the driver has LLTX set it does its own locking * in start_xmit. No need to add additional overhead by @@ -108,7 +110,7 @@ int qdisc_restart(struct net_device *dev) * will be requeued. */ if (!nolock) { - if (!spin_trylock(&dev->xmit_lock)) { + if (!netif_tx_trylock(dev)) { collision: /* So, someone grabbed the driver. */ @@ -126,8 +128,6 @@ int qdisc_restart(struct net_device *dev) __get_cpu_var(netdev_rx_stat).cpu_collision++; goto requeue; } - /* Remember that the driver is grabbed by us. */ - dev->xmit_lock_owner = smp_processor_id(); } { @@ -136,14 +136,11 @@ int qdisc_restart(struct net_device *dev) if (!netif_queue_stopped(dev)) { int ret; - if (netdev_nit) - dev_queue_xmit_nit(skb, dev); - ret = dev->hard_start_xmit(skb, dev); + ret = dev_hard_start_xmit(skb, dev); if (ret == NETDEV_TX_OK) { if (!nolock) { - dev->xmit_lock_owner = -1; - spin_unlock(&dev->xmit_lock); + netif_tx_unlock(dev); } spin_lock(&dev->queue_lock); return -1; @@ -157,8 +154,7 @@ int qdisc_restart(struct net_device *dev) /* NETDEV_TX_BUSY - we need to requeue */ /* Release the driver */ if (!nolock) { - dev->xmit_lock_owner = -1; - spin_unlock(&dev->xmit_lock); + netif_tx_unlock(dev); } spin_lock(&dev->queue_lock); q = dev->qdisc; @@ -175,7 +171,10 @@ int qdisc_restart(struct net_device *dev) */ requeue: - q->ops->requeue(skb, q); + if (skb->next) + dev->gso_skb = skb; + else + q->ops->requeue(skb, q); netif_schedule(dev); return 1; } @@ -183,11 +182,23 @@ requeue: return q->q.qlen; } +void __qdisc_run(struct net_device *dev) +{ + if (unlikely(dev->qdisc == &noop_qdisc)) + goto out; + + while (qdisc_restart(dev) < 0 && !netif_queue_stopped(dev)) + /* NOTHING */; + +out: + clear_bit(__LINK_STATE_QDISC_RUNNING, &dev->state); +} + static void dev_watchdog(unsigned long arg) { struct net_device *dev = (struct net_device *)arg; - spin_lock(&dev->xmit_lock); + netif_tx_lock(dev); if (dev->qdisc != &noop_qdisc) { if (netif_device_present(dev) && netif_running(dev) && @@ -203,7 +214,7 @@ static void dev_watchdog(unsigned long arg) dev_hold(dev); } } - spin_unlock(&dev->xmit_lock); + netif_tx_unlock(dev); dev_put(dev); } @@ -227,17 +238,17 @@ void __netdev_watchdog_up(struct net_device *dev) static void dev_watchdog_up(struct net_device *dev) { - spin_lock_bh(&dev->xmit_lock); + netif_tx_lock_bh(dev); __netdev_watchdog_up(dev); - spin_unlock_bh(&dev->xmit_lock); + netif_tx_unlock_bh(dev); } static void dev_watchdog_down(struct net_device *dev) { - spin_lock_bh(&dev->xmit_lock); + netif_tx_lock_bh(dev); if (del_timer(&dev->watchdog_timer)) dev_put(dev); - spin_unlock_bh(&dev->xmit_lock); + netif_tx_unlock_bh(dev); } void netif_carrier_on(struct net_device *dev) @@ -579,10 +590,17 @@ void dev_deactivate(struct net_device *dev) dev_watchdog_down(dev); - while (test_bit(__LINK_STATE_SCHED, &dev->state)) + /* Wait for outstanding dev_queue_xmit calls. */ + synchronize_rcu(); + + /* Wait for outstanding qdisc_run calls. */ + while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state)) yield(); - spin_unlock_wait(&dev->xmit_lock); + if (dev->gso_skb) { + kfree_skb(dev->gso_skb); + dev->gso_skb = NULL; + } } void dev_init_scheduler(struct net_device *dev) @@ -624,6 +642,5 @@ EXPORT_SYMBOL(qdisc_create_dflt); EXPORT_SYMBOL(qdisc_alloc); EXPORT_SYMBOL(qdisc_destroy); EXPORT_SYMBOL(qdisc_reset); -EXPORT_SYMBOL(qdisc_restart); EXPORT_SYMBOL(qdisc_lock_tree); EXPORT_SYMBOL(qdisc_unlock_tree); diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 29a2dd9f3029..0cafdd5feb1b 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -18,7 +18,6 @@ * For all the glorious comments look at include/net/red.h */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index f1c7bd29f2cd..6b1b4a981e88 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -50,7 +50,6 @@ */ #include <linux/kernel.h> -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/errno.h> diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 3ec95df4a85e..cc5f339e6f91 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -27,7 +27,6 @@ * * $Id: sch_htb.c,v 1.25 2003/12/07 11:08:25 devik Exp devik $ */ -#include <linux/config.h> #include <linux/module.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -197,7 +196,7 @@ struct htb_class struct qdisc_rate_table *rate; /* rate table of the class itself */ struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */ long buffer,cbuffer; /* token bucket depth/rate */ - long mbuffer; /* max wait time */ + psched_tdiff_t mbuffer; /* max wait time */ long tokens,ctokens; /* current number of tokens */ psched_time_t t_c; /* checkpoint time */ }; @@ -1602,7 +1601,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* set class to be in HTB_CAN_SEND state */ cl->tokens = hopt->buffer; cl->ctokens = hopt->cbuffer; - cl->mbuffer = 60000000; /* 1min */ + cl->mbuffer = PSCHED_JIFFIE2US(HZ*60); /* 1min */ PSCHED_GET_TIME(cl->t_c); cl->cmode = HTB_CAN_SEND; diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 8edc32a6ad2f..c3242f727d41 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -7,7 +7,6 @@ * Authors: Jamal Hadi Salim 1999 */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/skbuff.h> diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 5a4a4d0ae502..c5bd8064e6d8 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -13,7 +13,6 @@ * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro> */ -#include <linux/config.h> #include <linux/module.h> #include <linux/bitops.h> #include <linux/types.h> diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 3395ca7bcadf..a5fa03c0c19b 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -11,7 +11,6 @@ * Init -- EINVAL when opt undefined */ -#include <linux/config.h> #include <linux/module.h> #include <asm/uaccess.h> #include <asm/system.h> diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 2be563cba72b..d65cadddea69 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -14,7 +14,6 @@ * J Hadi Salim 980816: ECN support */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index e057768f68b4..d0d6e595a78c 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -9,7 +9,6 @@ * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> */ -#include <linux/config.h> #include <linux/module.h> #include <asm/uaccess.h> #include <asm/system.h> diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index d8e03c74ca76..d9a5d298d755 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -12,7 +12,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <asm/uaccess.h> #include <asm/system.h> diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 79b8ef34c6e4..4c16ad57a3e4 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -302,20 +302,17 @@ restart: switch (teql_resolve(skb, skb_res, slave)) { case 0: - if (spin_trylock(&slave->xmit_lock)) { - slave->xmit_lock_owner = smp_processor_id(); + if (netif_tx_trylock(slave)) { if (!netif_queue_stopped(slave) && slave->hard_start_xmit(skb, slave) == 0) { - slave->xmit_lock_owner = -1; - spin_unlock(&slave->xmit_lock); + netif_tx_unlock(slave); master->slaves = NEXT_SLAVE(q); netif_wake_queue(dev); master->stats.tx_packets++; master->stats.tx_bytes += len; return 0; } - slave->xmit_lock_owner = -1; - spin_unlock(&slave->xmit_lock); + netif_tx_unlock(slave); } if (netif_queue_stopped(dev)) busy = 1; diff --git a/net/sctp/input.c b/net/sctp/input.c index 1662f9cc869e..42b66e74bbb5 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -141,7 +141,8 @@ int sctp_rcv(struct sk_buff *skb) __skb_pull(skb, skb->h.raw - skb->data); if (skb->len < sizeof(struct sctphdr)) goto discard_it; - if (sctp_rcv_checksum(skb) < 0) + if ((skb->ip_summed != CHECKSUM_UNNECESSARY) && + (sctp_rcv_checksum(skb) < 0)) goto discard_it; skb_pull(skb, sizeof(struct sctphdr)); @@ -170,7 +171,8 @@ int sctp_rcv(struct sk_buff *skb) * IP broadcast addresses cannot be used in an SCTP transport * address." */ - if (!af->addr_valid(&src, NULL) || !af->addr_valid(&dest, NULL)) + if (!af->addr_valid(&src, NULL, skb) || + !af->addr_valid(&dest, NULL, skb)) goto discard_it; asoc = __sctp_rcv_lookup(skb, &src, &dest, &transport); diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index c20d282fac06..8ef08070c8b6 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -523,7 +523,9 @@ static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp) * Return 0 - If the address is a non-unicast or an illegal address. * Return 1 - If the address is a unicast. */ -static int sctp_v6_addr_valid(union sctp_addr *addr, struct sctp_sock *sp) +static int sctp_v6_addr_valid(union sctp_addr *addr, + struct sctp_sock *sp, + const struct sk_buff *skb) { int ret = ipv6_addr_type(&addr->v6.sin6_addr); @@ -537,7 +539,7 @@ static int sctp_v6_addr_valid(union sctp_addr *addr, struct sctp_sock *sp) if (sp && ipv6_only_sock(sctp_opt2sk(sp))) return 0; sctp_v6_map_v4(addr); - return sctp_get_af_specific(AF_INET)->addr_valid(addr, sp); + return sctp_get_af_specific(AF_INET)->addr_valid(addr, sp, skb); } /* Is this a non-unicast address */ diff --git a/net/sctp/output.c b/net/sctp/output.c index 437cba7260a4..cdc5a3936766 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -295,14 +295,14 @@ int sctp_packet_transmit(struct sctp_packet *packet) struct sctp_transport *tp = packet->transport; struct sctp_association *asoc = tp->asoc; struct sctphdr *sh; - __u32 crc32; + __u32 crc32 = 0; struct sk_buff *nskb; struct sctp_chunk *chunk, *tmp; struct sock *sk; int err = 0; int padding; /* How much padding do we need? */ __u8 has_data = 0; - struct dst_entry *dst; + struct dst_entry *dst = tp->dst; SCTP_DEBUG_PRINTK("%s: packet:%p\n", __FUNCTION__, packet); @@ -327,6 +327,19 @@ int sctp_packet_transmit(struct sctp_packet *packet) */ skb_set_owner_w(nskb, sk); + /* The 'obsolete' field of dst is set to 2 when a dst is freed. */ + if (!dst || (dst->obsolete > 1)) { + dst_release(dst); + sctp_transport_route(tp, NULL, sctp_sk(sk)); + if (asoc && (asoc->param_flags & SPP_PMTUD_ENABLE)) { + sctp_assoc_sync_pmtu(asoc); + } + } + nskb->dst = dst_clone(tp->dst); + if (!nskb->dst) + goto no_route; + dst = nskb->dst; + /* Build the SCTP header. */ sh = (struct sctphdr *)skb_push(nskb, sizeof(struct sctphdr)); sh->source = htons(packet->source_port); @@ -350,7 +363,8 @@ int sctp_packet_transmit(struct sctp_packet *packet) * Note: Adler-32 is no longer applicable, as has been replaced * by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>. */ - crc32 = sctp_start_cksum((__u8 *)sh, sizeof(struct sctphdr)); + if (!(dst->dev->features & NETIF_F_NO_CSUM)) + crc32 = sctp_start_cksum((__u8 *)sh, sizeof(struct sctphdr)); /** * 6.10 Bundling @@ -402,9 +416,14 @@ int sctp_packet_transmit(struct sctp_packet *packet) if (padding) memset(skb_put(chunk->skb, padding), 0, padding); - crc32 = sctp_update_copy_cksum(skb_put(nskb, chunk->skb->len), - chunk->skb->data, - chunk->skb->len, crc32); + if (dst->dev->features & NETIF_F_NO_CSUM) + memcpy(skb_put(nskb, chunk->skb->len), + chunk->skb->data, chunk->skb->len); + else + crc32 = sctp_update_copy_cksum(skb_put(nskb, + chunk->skb->len), + chunk->skb->data, + chunk->skb->len, crc32); SCTP_DEBUG_PRINTK("%s %p[%s] %s 0x%x, %s %d, %s %d, %s %d\n", "*** Chunk", chunk, @@ -427,7 +446,8 @@ int sctp_packet_transmit(struct sctp_packet *packet) } /* Perform final transformation on checksum. */ - crc32 = sctp_end_cksum(crc32); + if (!(dst->dev->features & NETIF_F_NO_CSUM)) + crc32 = sctp_end_cksum(crc32); /* 3) Put the resultant value into the checksum field in the * common header, and leave the rest of the bits unchanged. @@ -477,20 +497,6 @@ int sctp_packet_transmit(struct sctp_packet *packet) } } - dst = tp->dst; - /* The 'obsolete' field of dst is set to 2 when a dst is freed. */ - if (!dst || (dst->obsolete > 1)) { - dst_release(dst); - sctp_transport_route(tp, NULL, sctp_sk(sk)); - if (asoc->param_flags & SPP_PMTUD_ENABLE) { - sctp_assoc_sync_pmtu(asoc); - } - } - - nskb->dst = dst_clone(tp->dst); - if (!nskb->dst) - goto no_route; - SCTP_DEBUG_PRINTK("***sctp_transmit_packet*** skb len %d\n", nskb->len); diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index f148f9576dd2..e5faa351aaad 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1262,6 +1262,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, if (!tchunk->tsn_gap_acked && !tchunk->resent && tchunk->rtt_in_progress) { + tchunk->rtt_in_progress = 0; rtt = jiffies - tchunk->sent_at; sctp_transport_update_rto(transport, rtt); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 2088aa992b7a..816c033d7886 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -365,12 +365,18 @@ static int sctp_v4_is_any(const union sctp_addr *addr) * Return 0 - If the address is a non-unicast or an illegal address. * Return 1 - If the address is a unicast. */ -static int sctp_v4_addr_valid(union sctp_addr *addr, struct sctp_sock *sp) +static int sctp_v4_addr_valid(union sctp_addr *addr, + struct sctp_sock *sp, + const struct sk_buff *skb) { /* Is this a non-unicast address or a unusable SCTP address? */ if (IS_IPV4_UNUSABLE_ADDRESS(&addr->v4.sin_addr.s_addr)) return 0; + /* Is this a broadcast address? */ + if (skb && ((struct rtable *)skb->dst)->rt_flags & RTCF_BROADCAST) + return 0; + return 1; } diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 5e0de3c0eead..2a8773691695 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1402,14 +1402,14 @@ struct sctp_association *sctp_unpack_cookie( sg.length = bodysize; key = (char *)ep->secret_key[ep->current_key]; - memset(digest, 0x00, sizeof(digest)); + memset(digest, 0x00, SCTP_SIGNATURE_SIZE); sctp_crypto_hmac(sctp_sk(ep->base.sk)->hmac, key, &keylen, &sg, 1, digest); if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) { /* Try the previous key. */ key = (char *)ep->secret_key[ep->last_key]; - memset(digest, 0x00, sizeof(digest)); + memset(digest, 0x00, SCTP_SIGNATURE_SIZE); sctp_crypto_hmac(sctp_sk(ep->base.sk)->hmac, key, &keylen, &sg, 1, digest); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 8bc279219a72..9e58144f4851 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -5293,10 +5293,18 @@ static int sctp_eat_data(const struct sctp_association *asoc, * seems a bit troublesome in that frag_point varies based on * PMTU. In cases, such as loopback, this might be a rather * large spill over. + * NOTE: If we have a full receive buffer here, we only renege if + * our receiver can still make progress without the tsn being + * received. We do this because in the event that the associations + * receive queue is empty we are filling a leading gap, and since + * reneging moves the gap to the end of the tsn stream, we are likely + * to stall again very shortly. Avoiding the renege when we fill a + * leading gap is a good heuristic for avoiding such steady state + * stalls. */ if (!asoc->rwnd || asoc->rwnd_over || (datalen > asoc->rwnd + asoc->frag_point) || - rcvbuf_over) { + (rcvbuf_over && (!skb_queue_len(&sk->sk_receive_queue)))) { /* If this is the next TSN, consider reneging to make * room. Note: Playing nice with a confused sender. A diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 174d4d35e951..0a2c71d0d8aa 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -57,7 +57,6 @@ * be incorporated into the next SCTP release. */ -#include <linux/config.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/wait.h> @@ -172,7 +171,7 @@ static inline int sctp_verify_addr(struct sock *sk, union sctp_addr *addr, return -EINVAL; /* Is this a valid SCTP address? */ - if (!af->addr_valid(addr, sctp_sk(sk))) + if (!af->addr_valid(addr, sctp_sk(sk), NULL)) return -EINVAL; if (!sctp_sk(sk)->pf->send_verify(sctp_sk(sk), (addr))) @@ -2530,8 +2529,32 @@ static int sctp_setsockopt_associnfo(struct sock *sk, char __user *optval, int o /* Set the values to the specific association */ if (asoc) { - if (assocparams.sasoc_asocmaxrxt != 0) + if (assocparams.sasoc_asocmaxrxt != 0) { + __u32 path_sum = 0; + int paths = 0; + struct list_head *pos; + struct sctp_transport *peer_addr; + + list_for_each(pos, &asoc->peer.transport_addr_list) { + peer_addr = list_entry(pos, + struct sctp_transport, + transports); + path_sum += peer_addr->pathmaxrxt; + paths++; + } + + /* Only validate asocmaxrxt if we have more then + * one path/transport. We do this because path + * retransmissions are only counted when we have more + * then one path. + */ + if (paths > 1 && + assocparams.sasoc_asocmaxrxt > path_sum) + return -EINVAL; + asoc->max_retrans = assocparams.sasoc_asocmaxrxt; + } + if (assocparams.sasoc_cookie_life != 0) { asoc->cookie_life.tv_sec = assocparams.sasoc_cookie_life / 1000; diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index ba97f974f57c..ee236784a6bb 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -51,6 +51,8 @@ static void sctp_ulpevent_receive_data(struct sctp_ulpevent *event, struct sctp_association *asoc); static void sctp_ulpevent_release_data(struct sctp_ulpevent *event); +static void sctp_ulpevent_release_frag_data(struct sctp_ulpevent *event); + /* Initialize an ULP event from an given skb. */ SCTP_STATIC void sctp_ulpevent_init(struct sctp_ulpevent *event, int msg_flags) @@ -883,6 +885,7 @@ static void sctp_ulpevent_receive_data(struct sctp_ulpevent *event, static void sctp_ulpevent_release_data(struct sctp_ulpevent *event) { struct sk_buff *skb, *frag; + unsigned int len; /* Current stack structures assume that the rcv buffer is * per socket. For UDP style sockets this is not true as @@ -892,7 +895,30 @@ static void sctp_ulpevent_release_data(struct sctp_ulpevent *event) */ skb = sctp_event2skb(event); - sctp_assoc_rwnd_increase(event->asoc, skb_headlen(skb)); + len = skb->len; + + if (!skb->data_len) + goto done; + + /* Don't forget the fragments. */ + for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) { + /* NOTE: skb_shinfos are recursive. Although IP returns + * skb's with only 1 level of fragments, SCTP reassembly can + * increase the levels. + */ + sctp_ulpevent_release_frag_data(sctp_skb2event(frag)); + } + +done: + sctp_assoc_rwnd_increase(event->asoc, len); + sctp_ulpevent_release_owner(event); +} + +static void sctp_ulpevent_release_frag_data(struct sctp_ulpevent *event) +{ + struct sk_buff *skb, *frag; + + skb = sctp_event2skb(event); if (!skb->data_len) goto done; @@ -903,7 +929,7 @@ static void sctp_ulpevent_release_data(struct sctp_ulpevent *event) * skb's with only 1 level of fragments, SCTP reassembly can * increase the levels. */ - sctp_ulpevent_release_data(sctp_skb2event(frag)); + sctp_ulpevent_release_frag_data(sctp_skb2event(frag)); } done: diff --git a/net/socket.c b/net/socket.c index 02948b622bd2..b4848ce0d6ac 100644 --- a/net/socket.c +++ b/net/socket.c @@ -58,7 +58,6 @@ * Based upon Swansea University Computer Society NET3.039 */ -#include <linux/config.h> #include <linux/mm.h> #include <linux/smp_lock.h> #include <linux/socket.h> @@ -335,10 +334,11 @@ static struct super_operations sockfs_ops = { .statfs = simple_statfs, }; -static struct super_block *sockfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int sockfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC); + return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC, + mnt); } static struct vfsmount *sock_mnt __read_mostly; diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 129e2bd36aff..b8714a87b34c 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -169,7 +169,7 @@ gss_import_sec_context_kerberos(const void *p, } ctx_id->internal_ctx_id = ctx; - dprintk("RPC: Succesfully imported new context.\n"); + dprintk("RPC: Successfully imported new context.\n"); return 0; out_err_free_key2: diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index f43311221a72..2f312164d6d5 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c @@ -70,7 +70,7 @@ # define RPCDBG_FACILITY RPCDBG_AUTH #endif -spinlock_t krb5_seq_lock = SPIN_LOCK_UNLOCKED; +DEFINE_SPINLOCK(krb5_seq_lock); u32 gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text, diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index f8bac6ccd524..d88468d21c37 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -224,7 +224,8 @@ EXPORT_SYMBOL(gss_service_to_auth_domain_name); void gss_mech_put(struct gss_api_mech * gm) { - module_put(gm->gm_owner); + if (gm) + module_put(gm->gm_owner); } EXPORT_SYMBOL(gss_mech_put); @@ -307,8 +308,7 @@ gss_delete_sec_context(struct gss_ctx **context_handle) (*context_handle)->mech_type->gm_ops ->gss_delete_sec_context((*context_handle) ->internal_ctx_id); - if ((*context_handle)->mech_type) - gss_mech_put((*context_handle)->mech_type); + gss_mech_put((*context_handle)->mech_type); kfree(*context_handle); *context_handle=NULL; return GSS_S_COMPLETE; diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c index 5bf11ccba7cd..3d0432aa45c1 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_mech.c +++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c @@ -201,7 +201,7 @@ gss_import_sec_context_spkm3(const void *p, size_t len, ctx_id->internal_ctx_id = ctx; - dprintk("Succesfully imported new spkm context.\n"); + dprintk("Successfully imported new spkm context.\n"); return 0; out_err_free_key2: diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index d51e316c5821..94217ec9e2dd 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -425,6 +425,7 @@ static int rsc_parse(struct cache_detail *cd, struct rsc rsci, *rscp = NULL; time_t expiry; int status = -EINVAL; + struct gss_api_mech *gm = NULL; memset(&rsci, 0, sizeof(rsci)); /* context handle */ @@ -453,7 +454,6 @@ static int rsc_parse(struct cache_detail *cd, set_bit(CACHE_NEGATIVE, &rsci.h.flags); else { int N, i; - struct gss_api_mech *gm; /* gid */ if (get_int(&mesg, &rsci.cred.cr_gid)) @@ -488,21 +488,17 @@ static int rsc_parse(struct cache_detail *cd, status = -EINVAL; /* mech-specific data: */ len = qword_get(&mesg, buf, mlen); - if (len < 0) { - gss_mech_put(gm); + if (len < 0) goto out; - } status = gss_import_sec_context(buf, len, gm, &rsci.mechctx); - if (status) { - gss_mech_put(gm); + if (status) goto out; - } - gss_mech_put(gm); } rsci.h.expiry_time = expiry; rscp = rsc_update(&rsci, rscp); status = 0; out: + gss_mech_put(gm); rsc_free(&rsci); if (rscp) cache_put(&rscp->h, &rsc_cache); @@ -836,6 +832,74 @@ out: return stat; } +static inline int +total_buf_len(struct xdr_buf *buf) +{ + return buf->head[0].iov_len + buf->page_len + buf->tail[0].iov_len; +} + +static void +fix_priv_head(struct xdr_buf *buf, int pad) +{ + if (buf->page_len == 0) { + /* We need to adjust head and buf->len in tandem in this + * case to make svc_defer() work--it finds the original + * buffer start using buf->len - buf->head[0].iov_len. */ + buf->head[0].iov_len -= pad; + } +} + +static int +unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) +{ + u32 priv_len, maj_stat; + int pad, saved_len, remaining_len, offset; + + rqstp->rq_sendfile_ok = 0; + + priv_len = ntohl(svc_getu32(&buf->head[0])); + if (rqstp->rq_deferred) { + /* Already decrypted last time through! The sequence number + * check at out_seq is unnecessary but harmless: */ + goto out_seq; + } + /* buf->len is the number of bytes from the original start of the + * request to the end, where head[0].iov_len is just the bytes + * not yet read from the head, so these two values are different: */ + remaining_len = total_buf_len(buf); + if (priv_len > remaining_len) + return -EINVAL; + pad = remaining_len - priv_len; + buf->len -= pad; + fix_priv_head(buf, pad); + + /* Maybe it would be better to give gss_unwrap a length parameter: */ + saved_len = buf->len; + buf->len = priv_len; + maj_stat = gss_unwrap(ctx, 0, buf); + pad = priv_len - buf->len; + buf->len = saved_len; + buf->len -= pad; + /* The upper layers assume the buffer is aligned on 4-byte boundaries. + * In the krb5p case, at least, the data ends up offset, so we need to + * move it around. */ + /* XXX: This is very inefficient. It would be better to either do + * this while we encrypt, or maybe in the receive code, if we can peak + * ahead and work out the service and mechanism there. */ + offset = buf->head[0].iov_len % 4; + if (offset) { + buf->buflen = RPCSVC_MAXPAYLOAD; + xdr_shift_buf(buf, offset); + fix_priv_head(buf, pad); + } + if (maj_stat != GSS_S_COMPLETE) + return -EINVAL; +out_seq: + if (ntohl(svc_getu32(&buf->head[0])) != seq) + return -EINVAL; + return 0; +} + struct gss_svc_data { /* decoded gss client cred: */ struct rpc_gss_wire_cred clcred; @@ -1051,7 +1115,14 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp) svc_putu32(resv, 0); break; case RPC_GSS_SVC_PRIVACY: - /* currently unsupported */ + if (unwrap_priv_data(rqstp, &rqstp->rq_arg, + gc->gc_seq, rsci->mechctx)) + goto auth_err; + /* placeholders for length and seq. number: */ + svcdata->body_start = resv->iov_base + resv->iov_len; + svc_putu32(resv, 0); + svc_putu32(resv, 0); + break; default: goto auth_err; } @@ -1076,8 +1147,8 @@ out: return ret; } -static int -svcauth_gss_release(struct svc_rqst *rqstp) +static inline int +svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp) { struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data; struct rpc_gss_wire_cred *gc = &gsd->clcred; @@ -1089,69 +1160,147 @@ svcauth_gss_release(struct svc_rqst *rqstp) int integ_offset, integ_len; int stat = -EINVAL; + p = gsd->body_start; + gsd->body_start = NULL; + /* move accept_stat to right place: */ + memcpy(p, p + 2, 4); + /* Don't wrap in failure case: */ + /* Counting on not getting here if call was not even accepted! */ + if (*p != rpc_success) { + resbuf->head[0].iov_len -= 2 * 4; + goto out; + } + p++; + integ_offset = (u8 *)(p + 1) - (u8 *)resbuf->head[0].iov_base; + integ_len = resbuf->len - integ_offset; + BUG_ON(integ_len % 4); + *p++ = htonl(integ_len); + *p++ = htonl(gc->gc_seq); + if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, + integ_len)) + BUG(); + if (resbuf->page_len == 0 + && resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE + < PAGE_SIZE) { + BUG_ON(resbuf->tail[0].iov_len); + /* Use head for everything */ + resv = &resbuf->head[0]; + } else if (resbuf->tail[0].iov_base == NULL) { + if (resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE > PAGE_SIZE) + goto out_err; + resbuf->tail[0].iov_base = resbuf->head[0].iov_base + + resbuf->head[0].iov_len; + resbuf->tail[0].iov_len = 0; + rqstp->rq_restailpage = 0; + resv = &resbuf->tail[0]; + } else { + resv = &resbuf->tail[0]; + } + mic.data = (u8 *)resv->iov_base + resv->iov_len + 4; + if (gss_get_mic(gsd->rsci->mechctx, &integ_buf, &mic)) + goto out_err; + svc_putu32(resv, htonl(mic.len)); + memset(mic.data + mic.len, 0, + round_up_to_quad(mic.len) - mic.len); + resv->iov_len += XDR_QUADLEN(mic.len) << 2; + /* not strictly required: */ + resbuf->len += XDR_QUADLEN(mic.len) << 2; + BUG_ON(resv->iov_len > PAGE_SIZE); +out: + stat = 0; +out_err: + return stat; +} + +static inline int +svcauth_gss_wrap_resp_priv(struct svc_rqst *rqstp) +{ + struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data; + struct rpc_gss_wire_cred *gc = &gsd->clcred; + struct xdr_buf *resbuf = &rqstp->rq_res; + struct page **inpages = NULL; + u32 *p; + int offset, *len; + int pad; + + p = gsd->body_start; + gsd->body_start = NULL; + /* move accept_stat to right place: */ + memcpy(p, p + 2, 4); + /* Don't wrap in failure case: */ + /* Counting on not getting here if call was not even accepted! */ + if (*p != rpc_success) { + resbuf->head[0].iov_len -= 2 * 4; + return 0; + } + p++; + len = p++; + offset = (u8 *)p - (u8 *)resbuf->head[0].iov_base; + *p++ = htonl(gc->gc_seq); + inpages = resbuf->pages; + /* XXX: Would be better to write some xdr helper functions for + * nfs{2,3,4}xdr.c that place the data right, instead of copying: */ + if (resbuf->tail[0].iov_base && rqstp->rq_restailpage == 0) { + BUG_ON(resbuf->tail[0].iov_base >= resbuf->head[0].iov_base + + PAGE_SIZE); + BUG_ON(resbuf->tail[0].iov_base < resbuf->head[0].iov_base); + if (resbuf->tail[0].iov_len + resbuf->head[0].iov_len + + 2 * RPC_MAX_AUTH_SIZE > PAGE_SIZE) + return -ENOMEM; + memmove(resbuf->tail[0].iov_base + RPC_MAX_AUTH_SIZE, + resbuf->tail[0].iov_base, + resbuf->tail[0].iov_len); + resbuf->tail[0].iov_base += RPC_MAX_AUTH_SIZE; + } + if (resbuf->tail[0].iov_base == NULL) { + if (resbuf->head[0].iov_len + 2*RPC_MAX_AUTH_SIZE > PAGE_SIZE) + return -ENOMEM; + resbuf->tail[0].iov_base = resbuf->head[0].iov_base + + resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE; + resbuf->tail[0].iov_len = 0; + rqstp->rq_restailpage = 0; + } + if (gss_wrap(gsd->rsci->mechctx, offset, resbuf, inpages)) + return -ENOMEM; + *len = htonl(resbuf->len - offset); + pad = 3 - ((resbuf->len - offset - 1)&3); + p = (u32 *)(resbuf->tail[0].iov_base + resbuf->tail[0].iov_len); + memset(p, 0, pad); + resbuf->tail[0].iov_len += pad; + resbuf->len += pad; + return 0; +} + +static int +svcauth_gss_release(struct svc_rqst *rqstp) +{ + struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data; + struct rpc_gss_wire_cred *gc = &gsd->clcred; + struct xdr_buf *resbuf = &rqstp->rq_res; + int stat = -EINVAL; + if (gc->gc_proc != RPC_GSS_PROC_DATA) goto out; /* Release can be called twice, but we only wrap once. */ if (gsd->body_start == NULL) goto out; /* normally not set till svc_send, but we need it here: */ - resbuf->len = resbuf->head[0].iov_len - + resbuf->page_len + resbuf->tail[0].iov_len; + /* XXX: what for? Do we mess it up the moment we call svc_putu32 + * or whatever? */ + resbuf->len = total_buf_len(resbuf); switch (gc->gc_svc) { case RPC_GSS_SVC_NONE: break; case RPC_GSS_SVC_INTEGRITY: - p = gsd->body_start; - gsd->body_start = NULL; - /* move accept_stat to right place: */ - memcpy(p, p + 2, 4); - /* don't wrap in failure case: */ - /* Note: counting on not getting here if call was not even - * accepted! */ - if (*p != rpc_success) { - resbuf->head[0].iov_len -= 2 * 4; - goto out; - } - p++; - integ_offset = (u8 *)(p + 1) - (u8 *)resbuf->head[0].iov_base; - integ_len = resbuf->len - integ_offset; - BUG_ON(integ_len % 4); - *p++ = htonl(integ_len); - *p++ = htonl(gc->gc_seq); - if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, - integ_len)) - BUG(); - if (resbuf->page_len == 0 - && resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE - < PAGE_SIZE) { - BUG_ON(resbuf->tail[0].iov_len); - /* Use head for everything */ - resv = &resbuf->head[0]; - } else if (resbuf->tail[0].iov_base == NULL) { - if (resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE - > PAGE_SIZE) - goto out_err; - resbuf->tail[0].iov_base = - resbuf->head[0].iov_base - + resbuf->head[0].iov_len; - resbuf->tail[0].iov_len = 0; - rqstp->rq_restailpage = 0; - resv = &resbuf->tail[0]; - } else { - resv = &resbuf->tail[0]; - } - mic.data = (u8 *)resv->iov_base + resv->iov_len + 4; - if (gss_get_mic(gsd->rsci->mechctx, &integ_buf, &mic)) + stat = svcauth_gss_wrap_resp_integ(rqstp); + if (stat) goto out_err; - svc_putu32(resv, htonl(mic.len)); - memset(mic.data + mic.len, 0, - round_up_to_quad(mic.len) - mic.len); - resv->iov_len += XDR_QUADLEN(mic.len) << 2; - /* not strictly required: */ - resbuf->len += XDR_QUADLEN(mic.len) << 2; - BUG_ON(resv->iov_len > PAGE_SIZE); break; case RPC_GSS_SVC_PRIVACY: + stat = svcauth_gss_wrap_resp_priv(rqstp); + if (stat) + goto out_err; + break; default: goto out_err; } diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index f56767aaa927..2eccffa96ba1 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -118,6 +118,8 @@ struct rpc_auth null_auth = { .au_cslack = 4, .au_rslack = 2, .au_ops = &authnull_ops, + .au_flavor = RPC_AUTH_NULL, + .au_count = ATOMIC_INIT(0), }; static diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index df14b6bfbf10..74c7406a1054 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -225,6 +225,7 @@ struct rpc_auth unix_auth = { .au_cslack = UNX_WRITESLACK, .au_rslack = 2, /* assume AUTH_NULL verf */ .au_ops = &authunix_ops, + .au_flavor = RPC_AUTH_UNIX, .au_count = ATOMIC_INIT(0), .au_credcache = &unix_cred_cache, }; diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c index d25b054ec921..623180f224c9 100644 --- a/net/sunrpc/pmap_clnt.c +++ b/net/sunrpc/pmap_clnt.c @@ -6,7 +6,6 @@ * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> */ -#include <linux/config.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/kernel.h> diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index cc673dd8433f..dc6cb93c8830 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -8,7 +8,6 @@ * Copyright (c) 2002, Trond Myklebust <trond.myklebust@fys.uio.no> * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/string.h> @@ -439,7 +438,7 @@ struct vfsmount *rpc_get_mount(void) { int err; - err = simple_pin_fs("rpc_pipefs", &rpc_mount, &rpc_mount_count); + err = simple_pin_fs(&rpc_pipe_fs_type, &rpc_mount, &rpc_mount_count); if (err != 0) return ERR_PTR(err); return rpc_mount; @@ -516,7 +515,7 @@ rpc_depopulate(struct dentry *parent) struct dentry *dentry, *dvec[10]; int n = 0; - mutex_lock(&dir->i_mutex); + mutex_lock_nested(&dir->i_mutex, I_MUTEX_CHILD); repeat: spin_lock(&dcache_lock); list_for_each_safe(pos, next, &parent->d_subdirs) { @@ -632,7 +631,7 @@ rpc_lookup_negative(char *path, struct nameidata *nd) if ((error = rpc_lookup_parent(path, nd)) != 0) return ERR_PTR(error); dir = nd->dentry->d_inode; - mutex_lock(&dir->i_mutex); + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); dentry = lookup_one_len(nd->last.name, nd->dentry, nd->last.len); if (IS_ERR(dentry)) goto out_err; @@ -694,7 +693,7 @@ rpc_rmdir(char *path) if ((error = rpc_lookup_parent(path, &nd)) != 0) return error; dir = nd.dentry->d_inode; - mutex_lock(&dir->i_mutex); + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); dentry = lookup_one_len(nd.last.name, nd.dentry, nd.last.len); if (IS_ERR(dentry)) { error = PTR_ERR(dentry); @@ -755,7 +754,7 @@ rpc_unlink(char *path) if ((error = rpc_lookup_parent(path, &nd)) != 0) return error; dir = nd.dentry->d_inode; - mutex_lock(&dir->i_mutex); + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); dentry = lookup_one_len(nd.last.name, nd.dentry, nd.last.len); if (IS_ERR(dentry)) { error = PTR_ERR(dentry); @@ -815,11 +814,11 @@ out: return -ENOMEM; } -static struct super_block * +static int rpc_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_single(fs_type, flags, data, rpc_fill_super); + return get_sb_single(fs_type, flags, data, rpc_fill_super, mnt); } static struct file_system_type rpc_pipe_fs_type = { diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 769114f0f886..f38f939ce95f 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -6,7 +6,6 @@ * Copyright (C) 1997 Olaf Kirch <okir@monad.swb.de> */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index b08419e1fc68..01ba60a49572 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -280,7 +280,10 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp) rqstp->rq_res.page_base = 0; rqstp->rq_res.page_len = 0; rqstp->rq_res.buflen = PAGE_SIZE; + rqstp->rq_res.tail[0].iov_base = NULL; rqstp->rq_res.tail[0].iov_len = 0; + /* Will be turned off only in gss privacy case: */ + rqstp->rq_sendfile_ok = 1; /* tcp needs a space for the record length... */ if (rqstp->rq_prot == IPPROTO_TCP) svc_putu32(resv, 0); diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index 1065904841fd..d89b048ad6bb 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -7,7 +7,6 @@ * impossible at the moment. */ -#include <linux/config.h> #include <linux/types.h> #include <linux/linkage.h> #include <linux/ctype.h> diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index ca4bfa57e116..6ac45103a272 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -191,7 +191,6 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base, do { /* Are any pointers crossing a page boundary? */ if (pgto_base == 0) { - flush_dcache_page(*pgto); pgto_base = PAGE_CACHE_SIZE; pgto--; } @@ -211,11 +210,11 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base, vto = kmap_atomic(*pgto, KM_USER0); vfrom = kmap_atomic(*pgfrom, KM_USER1); memmove(vto + pgto_base, vfrom + pgfrom_base, copy); + flush_dcache_page(*pgto); kunmap_atomic(vfrom, KM_USER1); kunmap_atomic(vto, KM_USER0); } while ((len -= copy) != 0); - flush_dcache_page(*pgto); } /* @@ -568,8 +567,7 @@ EXPORT_SYMBOL(xdr_inline_decode); * * Moves data beyond the current pointer position from the XDR head[] buffer * into the page list. Any data that lies beyond current position + "len" - * bytes is moved into the XDR tail[]. The current pointer is then - * repositioned at the beginning of the XDR tail. + * bytes is moved into the XDR tail[]. */ void xdr_read_pages(struct xdr_stream *xdr, unsigned int len) { @@ -606,6 +604,31 @@ void xdr_read_pages(struct xdr_stream *xdr, unsigned int len) } EXPORT_SYMBOL(xdr_read_pages); +/** + * xdr_enter_page - decode data from the XDR page + * @xdr: pointer to xdr_stream struct + * @len: number of bytes of page data + * + * Moves data beyond the current pointer position from the XDR head[] buffer + * into the page list. Any data that lies beyond current position + "len" + * bytes is moved into the XDR tail[]. The current pointer is then + * repositioned at the beginning of the first XDR page. + */ +void xdr_enter_page(struct xdr_stream *xdr, unsigned int len) +{ + char * kaddr = page_address(xdr->buf->pages[0]); + xdr_read_pages(xdr, len); + /* + * Position current pointer at beginning of tail, and + * set remaining message length. + */ + if (len > PAGE_CACHE_SIZE - xdr->buf->page_base) + len = PAGE_CACHE_SIZE - xdr->buf->page_base; + xdr->p = (uint32_t *)(kaddr + xdr->buf->page_base); + xdr->end = (uint32_t *)((char *)xdr->p + len); +} +EXPORT_SYMBOL(xdr_enter_page); + static struct kvec empty_iov = {.iov_base = NULL, .iov_len = 0}; void diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 4dd5b3cfe754..02060d0e7be8 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -41,7 +41,7 @@ #include <linux/types.h> #include <linux/interrupt.h> #include <linux/workqueue.h> -#include <linux/random.h> +#include <linux/net.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/metrics.h> @@ -830,7 +830,7 @@ static inline u32 xprt_alloc_xid(struct rpc_xprt *xprt) static inline void xprt_init_xid(struct rpc_xprt *xprt) { - get_random_bytes(&xprt->xid, sizeof(xprt->xid)); + xprt->xid = net_random(); } static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 4b4e7dfdff14..21006b109101 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -930,6 +930,13 @@ static void xs_udp_timer(struct rpc_task *task) xprt_adjust_cwnd(task, -ETIMEDOUT); } +static unsigned short xs_get_random_port(void) +{ + unsigned short range = xprt_max_resvport - xprt_min_resvport; + unsigned short rand = (unsigned short) net_random() % range; + return rand + xprt_min_resvport; +} + /** * xs_set_port - reset the port number in the remote endpoint address * @xprt: generic transport @@ -1275,7 +1282,7 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) memset(xprt->slot, 0, slot_table_size); xprt->prot = IPPROTO_UDP; - xprt->port = xprt_max_resvport; + xprt->port = xs_get_random_port(); xprt->tsh_size = 0; xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; /* XXX: header size can vary due to auth type, IPv6, etc. */ @@ -1317,7 +1324,7 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) memset(xprt->slot, 0, slot_table_size); xprt->prot = IPPROTO_TCP; - xprt->port = xprt_max_resvport; + xprt->port = xs_get_random_port(); xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 55538f6b60ff..cd4eafbab1b8 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -12,7 +12,6 @@ * */ -#include <linux/config.h> #include <linux/mm.h> #include <linux/sysctl.h> @@ -37,14 +36,6 @@ struct ctl_table net_table[] = { .mode = 0555, .child = core_table, }, -#ifdef CONFIG_NET - { - .ctl_name = NET_ETHER, - .procname = "ethernet", - .mode = 0555, - .child = ether_table, - }, -#endif #ifdef CONFIG_INET { .ctl_name = NET_IPV4, diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 2c4ecbe50082..1bb75703f384 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -49,13 +49,19 @@ #include "name_table.h" #include "bcast.h" - #define MAX_PKT_DEFAULT_MCAST 1500 /* bcast link max packet size (fixed) */ #define BCLINK_WIN_DEFAULT 20 /* bcast link window size (default) */ #define BCLINK_LOG_BUF_SIZE 0 +/* + * Loss rate for incoming broadcast frames; used to test retransmission code. + * Set to N to cause every N'th frame to be discarded; 0 => don't discard any. + */ + +#define TIPC_BCAST_LOSS_RATE 0 + /** * struct bcbearer_pair - a pair of bearers used by broadcast link * @primary: pointer to primary bearer @@ -75,7 +81,14 @@ struct bcbearer_pair { * @bearer: (non-standard) broadcast bearer structure * @media: (non-standard) broadcast media structure * @bpairs: array of bearer pairs - * @bpairs_temp: array of bearer pairs used during creation of "bpairs" + * @bpairs_temp: temporary array of bearer pairs used by tipc_bcbearer_sort() + * @remains: temporary node map used by tipc_bcbearer_send() + * @remains_new: temporary node map used tipc_bcbearer_send() + * + * Note: The fields labelled "temporary" are incorporated into the bearer + * to avoid consuming potentially limited stack space through the use of + * large local variables within multicast routines. Concurrent access is + * prevented through use of the spinlock "bc_lock". */ struct bcbearer { @@ -83,6 +96,8 @@ struct bcbearer { struct media media; struct bcbearer_pair bpairs[MAX_BEARERS]; struct bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1]; + struct node_map remains; + struct node_map remains_new; }; /** @@ -102,7 +117,7 @@ struct bclink { static struct bcbearer *bcbearer = NULL; static struct bclink *bclink = NULL; static struct link *bcl = NULL; -static spinlock_t bc_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(bc_lock); char tipc_bclink_name[] = "multicast-link"; @@ -165,21 +180,18 @@ static int bclink_ack_allowed(u32 n) * @after: sequence number of last packet to *not* retransmit * @to: sequence number of last packet to retransmit * - * Called with 'node' locked, bc_lock unlocked + * Called with bc_lock locked */ static void bclink_retransmit_pkt(u32 after, u32 to) { struct sk_buff *buf; - spin_lock_bh(&bc_lock); buf = bcl->first_out; while (buf && less_eq(buf_seqno(buf), after)) { buf = buf->next; } - if (buf != NULL) - tipc_link_retransmit(bcl, buf, mod(to - after)); - spin_unlock_bh(&bc_lock); + tipc_link_retransmit(bcl, buf, mod(to - after)); } /** @@ -346,8 +358,10 @@ static void tipc_bclink_peek_nack(u32 dest, u32 sender_tag, u32 gap_after, u32 g for (; buf; buf = buf->next) { u32 seqno = buf_seqno(buf); - if (mod(seqno - prev) != 1) + if (mod(seqno - prev) != 1) { buf = NULL; + break; + } if (seqno == gap_after) break; prev = seqno; @@ -399,7 +413,10 @@ int tipc_bclink_send_msg(struct sk_buff *buf) */ void tipc_bclink_recv_pkt(struct sk_buff *buf) -{ +{ +#if (TIPC_BCAST_LOSS_RATE) + static int rx_count = 0; +#endif struct tipc_msg *msg = buf_msg(buf); struct node* node = tipc_node_find(msg_prevnode(msg)); u32 next_in; @@ -420,9 +437,13 @@ void tipc_bclink_recv_pkt(struct sk_buff *buf) tipc_node_lock(node); tipc_bclink_acknowledge(node, msg_bcast_ack(msg)); tipc_node_unlock(node); + spin_lock_bh(&bc_lock); bcl->stats.recv_nacks++; + bcl->owner->next = node; /* remember requestor */ bclink_retransmit_pkt(msg_bcgap_after(msg), msg_bcgap_to(msg)); + bcl->owner->next = NULL; + spin_unlock_bh(&bc_lock); } else { tipc_bclink_peek_nack(msg_destnode(msg), msg_bcast_tag(msg), @@ -433,6 +454,14 @@ void tipc_bclink_recv_pkt(struct sk_buff *buf) return; } +#if (TIPC_BCAST_LOSS_RATE) + if (++rx_count == TIPC_BCAST_LOSS_RATE) { + rx_count = 0; + buf_discard(buf); + return; + } +#endif + tipc_node_lock(node); receive: deferred = node->bclink.deferred_head; @@ -531,12 +560,8 @@ static int tipc_bcbearer_send(struct sk_buff *buf, { static int send_count = 0; - struct node_map *remains; - struct node_map *remains_new; - struct node_map *remains_tmp; int bp_index; int swap_time; - int err; /* Prepare buffer for broadcasting (if first time trying to send it) */ @@ -557,9 +582,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf, /* Send buffer over bearers until all targets reached */ - remains = kmalloc(sizeof(struct node_map), GFP_ATOMIC); - remains_new = kmalloc(sizeof(struct node_map), GFP_ATOMIC); - *remains = tipc_cltr_bcast_nodes; + bcbearer->remains = tipc_cltr_bcast_nodes; for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) { struct bearer *p = bcbearer->bpairs[bp_index].primary; @@ -568,8 +591,8 @@ static int tipc_bcbearer_send(struct sk_buff *buf, if (!p) break; /* no more bearers to try */ - tipc_nmap_diff(remains, &p->nodes, remains_new); - if (remains_new->count == remains->count) + tipc_nmap_diff(&bcbearer->remains, &p->nodes, &bcbearer->remains_new); + if (bcbearer->remains_new.count == bcbearer->remains.count) continue; /* bearer pair doesn't add anything */ if (!p->publ.blocked && @@ -587,27 +610,17 @@ swap: bcbearer->bpairs[bp_index].primary = s; bcbearer->bpairs[bp_index].secondary = p; update: - if (remains_new->count == 0) { - err = TIPC_OK; - goto out; - } + if (bcbearer->remains_new.count == 0) + return TIPC_OK; - /* swap map */ - remains_tmp = remains; - remains = remains_new; - remains_new = remains_tmp; + bcbearer->remains = bcbearer->remains_new; } /* Unable to reach all targets */ bcbearer->bearer.publ.blocked = 1; bcl->stats.bearer_congs++; - err = ~TIPC_OK; - - out: - kfree(remains_new); - kfree(remains); - return err; + return ~TIPC_OK; } /** @@ -765,7 +778,7 @@ int tipc_bclink_init(void) bclink = kmalloc(sizeof(*bclink), GFP_ATOMIC); if (!bcbearer || !bclink) { nomem: - warn("Memory squeeze; Failed to create multicast link\n"); + warn("Multicast link creation failed, no memory\n"); kfree(bcbearer); bcbearer = NULL; kfree(bclink); @@ -783,7 +796,7 @@ int tipc_bclink_init(void) memset(bclink, 0, sizeof(struct bclink)); INIT_LIST_HEAD(&bcl->waiting_ports); bcl->next_out_no = 1; - bclink->node.lock = SPIN_LOCK_UNLOCKED; + spin_lock_init(&bclink->node.lock); bcl->owner = &bclink->node; bcl->max_pkt = MAX_PKT_DEFAULT_MCAST; tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT); diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 0e3be2ab3307..b243d9d495f0 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -180,7 +180,7 @@ static inline void tipc_port_list_add(struct port_list *pl_ptr, u32 port) if (!item->next) { item->next = kmalloc(sizeof(*item), GFP_ATOMIC); if (!item->next) { - warn("Memory squeeze: multicast destination port list is incomplete\n"); + warn("Incomplete multicast delivery, no memory\n"); return; } item->next->next = NULL; diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index e213a8e54855..7ef17a449cfd 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -112,39 +112,42 @@ int tipc_register_media(u32 media_type, goto exit; if (!media_name_valid(name)) { - warn("Media registration error: illegal name <%s>\n", name); + warn("Media <%s> rejected, illegal name\n", name); goto exit; } if (!bcast_addr) { - warn("Media registration error: no broadcast address supplied\n"); + warn("Media <%s> rejected, no broadcast address\n", name); goto exit; } if ((bearer_priority < TIPC_MIN_LINK_PRI) && (bearer_priority > TIPC_MAX_LINK_PRI)) { - warn("Media registration error: priority %u\n", bearer_priority); + warn("Media <%s> rejected, illegal priority (%u)\n", name, + bearer_priority); goto exit; } if ((link_tolerance < TIPC_MIN_LINK_TOL) || (link_tolerance > TIPC_MAX_LINK_TOL)) { - warn("Media registration error: tolerance %u\n", link_tolerance); + warn("Media <%s> rejected, illegal tolerance (%u)\n", name, + link_tolerance); goto exit; } media_id = media_count++; if (media_id >= MAX_MEDIA) { - warn("Attempt to register more than %u media\n", MAX_MEDIA); + warn("Media <%s> rejected, media limit reached (%u)\n", name, + MAX_MEDIA); media_count--; goto exit; } for (i = 0; i < media_id; i++) { if (media_list[i].type_id == media_type) { - warn("Attempt to register second media with type %u\n", + warn("Media <%s> rejected, duplicate type (%u)\n", name, media_type); media_count--; goto exit; } if (!strcmp(name, media_list[i].name)) { - warn("Attempt to re-register media name <%s>\n", name); + warn("Media <%s> rejected, duplicate name\n", name); media_count--; goto exit; } @@ -283,6 +286,9 @@ static struct bearer *bearer_find(const char *name) struct bearer *b_ptr; u32 i; + if (tipc_mode != TIPC_NET_MODE) + return NULL; + for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) { if (b_ptr->active && (!strcmp(b_ptr->publ.name, name))) return b_ptr; @@ -475,26 +481,33 @@ int tipc_enable_bearer(const char *name, u32 bcast_scope, u32 priority) u32 i; int res = -EINVAL; - if (tipc_mode != TIPC_NET_MODE) + if (tipc_mode != TIPC_NET_MODE) { + warn("Bearer <%s> rejected, not supported in standalone mode\n", + name); return -ENOPROTOOPT; - - if (!bearer_name_validate(name, &b_name) || - !tipc_addr_domain_valid(bcast_scope) || - !in_scope(bcast_scope, tipc_own_addr)) + } + if (!bearer_name_validate(name, &b_name)) { + warn("Bearer <%s> rejected, illegal name\n", name); return -EINVAL; - + } + if (!tipc_addr_domain_valid(bcast_scope) || + !in_scope(bcast_scope, tipc_own_addr)) { + warn("Bearer <%s> rejected, illegal broadcast scope\n", name); + return -EINVAL; + } if ((priority < TIPC_MIN_LINK_PRI || priority > TIPC_MAX_LINK_PRI) && - (priority != TIPC_MEDIA_LINK_PRI)) + (priority != TIPC_MEDIA_LINK_PRI)) { + warn("Bearer <%s> rejected, illegal priority\n", name); return -EINVAL; + } write_lock_bh(&tipc_net_lock); - if (!tipc_bearers) - goto failed; m_ptr = media_find(b_name.media_name); if (!m_ptr) { - warn("No media <%s>\n", b_name.media_name); + warn("Bearer <%s> rejected, media <%s> not registered\n", name, + b_name.media_name); goto failed; } @@ -510,23 +523,24 @@ restart: continue; } if (!strcmp(name, tipc_bearers[i].publ.name)) { - warn("Bearer <%s> already enabled\n", name); + warn("Bearer <%s> rejected, already enabled\n", name); goto failed; } if ((tipc_bearers[i].priority == priority) && (++with_this_prio > 2)) { if (priority-- == 0) { - warn("Third bearer <%s> with priority %u, unable to lower to %u\n", - name, priority + 1, priority); + warn("Bearer <%s> rejected, duplicate priority\n", + name); goto failed; } - warn("Third bearer <%s> with priority %u, lowering to %u\n", + warn("Bearer <%s> priority adjustment required %u->%u\n", name, priority + 1, priority); goto restart; } } if (bearer_id >= MAX_BEARERS) { - warn("Attempt to enable more than %d bearers\n", MAX_BEARERS); + warn("Bearer <%s> rejected, bearer limit reached (%u)\n", + name, MAX_BEARERS); goto failed; } @@ -536,7 +550,7 @@ restart: strcpy(b_ptr->publ.name, name); res = m_ptr->enable_bearer(&b_ptr->publ); if (res) { - warn("Failed to enable bearer <%s>\n", name); + warn("Bearer <%s> rejected, enable failure (%d)\n", name, -res); goto failed; } @@ -552,7 +566,7 @@ restart: b_ptr->link_req = tipc_disc_init_link_req(b_ptr, &m_ptr->bcast_addr, bcast_scope, 2); } - b_ptr->publ.lock = SPIN_LOCK_UNLOCKED; + spin_lock_init(&b_ptr->publ.lock); write_unlock_bh(&tipc_net_lock); info("Enabled bearer <%s>, discovery domain %s, priority %u\n", name, addr_string_fill(addr_string, bcast_scope), priority); @@ -573,9 +587,6 @@ int tipc_block_bearer(const char *name) struct link *l_ptr; struct link *temp_l_ptr; - if (tipc_mode != TIPC_NET_MODE) - return -ENOPROTOOPT; - read_lock_bh(&tipc_net_lock); b_ptr = bearer_find(name); if (!b_ptr) { @@ -584,6 +595,7 @@ int tipc_block_bearer(const char *name) return -EINVAL; } + info("Blocking bearer <%s>\n", name); spin_lock_bh(&b_ptr->publ.lock); b_ptr->publ.blocked = 1; list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { @@ -595,7 +607,6 @@ int tipc_block_bearer(const char *name) } spin_unlock_bh(&b_ptr->publ.lock); read_unlock_bh(&tipc_net_lock); - info("Blocked bearer <%s>\n", name); return TIPC_OK; } @@ -611,15 +622,13 @@ static int bearer_disable(const char *name) struct link *l_ptr; struct link *temp_l_ptr; - if (tipc_mode != TIPC_NET_MODE) - return -ENOPROTOOPT; - b_ptr = bearer_find(name); if (!b_ptr) { warn("Attempt to disable unknown bearer <%s>\n", name); return -EINVAL; } + info("Disabling bearer <%s>\n", name); tipc_disc_stop_link_req(b_ptr->link_req); spin_lock_bh(&b_ptr->publ.lock); b_ptr->link_req = NULL; @@ -635,7 +644,6 @@ static int bearer_disable(const char *name) tipc_link_delete(l_ptr); } spin_unlock_bh(&b_ptr->publ.lock); - info("Disabled bearer <%s>\n", name); memset(b_ptr, 0, sizeof(struct bearer)); return TIPC_OK; } diff --git a/net/tipc/cluster.c b/net/tipc/cluster.c index 1aed81584e96..1dcb6940e338 100644 --- a/net/tipc/cluster.c +++ b/net/tipc/cluster.c @@ -60,8 +60,10 @@ struct cluster *tipc_cltr_create(u32 addr) int alloc; c_ptr = (struct cluster *)kmalloc(sizeof(*c_ptr), GFP_ATOMIC); - if (c_ptr == NULL) + if (c_ptr == NULL) { + warn("Cluster creation failure, no memory\n"); return NULL; + } memset(c_ptr, 0, sizeof(*c_ptr)); c_ptr->addr = tipc_addr(tipc_zone(addr), tipc_cluster(addr), 0); @@ -70,30 +72,32 @@ struct cluster *tipc_cltr_create(u32 addr) else max_nodes = tipc_max_nodes + 1; alloc = sizeof(void *) * (max_nodes + 1); + c_ptr->nodes = (struct node **)kmalloc(alloc, GFP_ATOMIC); if (c_ptr->nodes == NULL) { + warn("Cluster creation failure, no memory for node area\n"); kfree(c_ptr); return NULL; } - memset(c_ptr->nodes, 0, alloc); + memset(c_ptr->nodes, 0, alloc); + if (in_own_cluster(addr)) tipc_local_nodes = c_ptr->nodes; c_ptr->highest_slave = LOWEST_SLAVE - 1; c_ptr->highest_node = 0; z_ptr = tipc_zone_find(tipc_zone(addr)); - if (z_ptr == NULL) { + if (!z_ptr) { z_ptr = tipc_zone_create(addr); } - if (z_ptr != NULL) { - tipc_zone_attach_cluster(z_ptr, c_ptr); - c_ptr->owner = z_ptr; - } - else { + if (!z_ptr) { + kfree(c_ptr->nodes); kfree(c_ptr); - c_ptr = NULL; + return NULL; } + tipc_zone_attach_cluster(z_ptr, c_ptr); + c_ptr->owner = z_ptr; return c_ptr; } diff --git a/net/tipc/config.c b/net/tipc/config.c index 48b5de2dbe60..285e1bc2d880 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -63,7 +63,7 @@ struct manager { static struct manager mng = { 0}; -static spinlock_t config_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(config_lock); static const void *req_tlv_area; /* request message TLV area */ static int req_tlv_space; /* request message TLV area size */ @@ -291,13 +291,22 @@ static struct sk_buff *cfg_set_own_addr(void) if (!tipc_addr_node_valid(addr)) return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE " (node address)"); - if (tipc_own_addr) + if (tipc_mode == TIPC_NET_MODE) return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (cannot change node address once assigned)"); + tipc_own_addr = addr; + + /* + * Must release all spinlocks before calling start_net() because + * Linux version of TIPC calls eth_media_start() which calls + * register_netdevice_notifier() which may block! + * + * Temporarily releasing the lock should be harmless for non-Linux TIPC, + * but Linux version of eth_media_start() should really be reworked + * so that it can be called with spinlocks held. + */ spin_unlock_bh(&config_lock); - tipc_core_stop_net(); - tipc_own_addr = addr; tipc_core_start_net(); spin_lock_bh(&config_lock); return tipc_cfg_reply_none(); @@ -350,50 +359,21 @@ static struct sk_buff *cfg_set_max_subscriptions(void) static struct sk_buff *cfg_set_max_ports(void) { - int orig_mode; u32 value; if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); value = *(u32 *)TLV_DATA(req_tlv_area); value = ntohl(value); + if (value == tipc_max_ports) + return tipc_cfg_reply_none(); if (value != delimit(value, 127, 65535)) return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE " (max ports must be 127-65535)"); - - if (value == tipc_max_ports) - return tipc_cfg_reply_none(); - - if (atomic_read(&tipc_user_count) > 2) + if (tipc_mode != TIPC_NOT_RUNNING) return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (cannot change max ports while TIPC users exist)"); - - spin_unlock_bh(&config_lock); - orig_mode = tipc_get_mode(); - if (orig_mode == TIPC_NET_MODE) - tipc_core_stop_net(); - tipc_core_stop(); + " (cannot change max ports while TIPC is active)"); tipc_max_ports = value; - tipc_core_start(); - if (orig_mode == TIPC_NET_MODE) - tipc_core_start_net(); - spin_lock_bh(&config_lock); - return tipc_cfg_reply_none(); -} - -static struct sk_buff *set_net_max(int value, int *parameter) -{ - int orig_mode; - - if (value != *parameter) { - orig_mode = tipc_get_mode(); - if (orig_mode == TIPC_NET_MODE) - tipc_core_stop_net(); - *parameter = value; - if (orig_mode == TIPC_NET_MODE) - tipc_core_start_net(); - } - return tipc_cfg_reply_none(); } @@ -405,10 +385,16 @@ static struct sk_buff *cfg_set_max_zones(void) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); value = *(u32 *)TLV_DATA(req_tlv_area); value = ntohl(value); + if (value == tipc_max_zones) + return tipc_cfg_reply_none(); if (value != delimit(value, 1, 255)) return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE " (max zones must be 1-255)"); - return set_net_max(value, &tipc_max_zones); + if (tipc_mode == TIPC_NET_MODE) + return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED + " (cannot change max zones once TIPC has joined a network)"); + tipc_max_zones = value; + return tipc_cfg_reply_none(); } static struct sk_buff *cfg_set_max_clusters(void) @@ -419,8 +405,8 @@ static struct sk_buff *cfg_set_max_clusters(void) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); value = *(u32 *)TLV_DATA(req_tlv_area); value = ntohl(value); - if (value != 1) - return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED + if (value != delimit(value, 1, 1)) + return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE " (max clusters fixed at 1)"); return tipc_cfg_reply_none(); } @@ -433,10 +419,16 @@ static struct sk_buff *cfg_set_max_nodes(void) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); value = *(u32 *)TLV_DATA(req_tlv_area); value = ntohl(value); + if (value == tipc_max_nodes) + return tipc_cfg_reply_none(); if (value != delimit(value, 8, 2047)) return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE " (max nodes must be 8-2047)"); - return set_net_max(value, &tipc_max_nodes); + if (tipc_mode == TIPC_NET_MODE) + return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED + " (cannot change max nodes once TIPC has joined a network)"); + tipc_max_nodes = value; + return tipc_cfg_reply_none(); } static struct sk_buff *cfg_set_max_slaves(void) @@ -461,15 +453,16 @@ static struct sk_buff *cfg_set_netid(void) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); value = *(u32 *)TLV_DATA(req_tlv_area); value = ntohl(value); + if (value == tipc_net_id) + return tipc_cfg_reply_none(); if (value != delimit(value, 1, 9999)) return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE " (network id must be 1-9999)"); - - if (tipc_own_addr) + if (tipc_mode == TIPC_NET_MODE) return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (cannot change network id once part of network)"); - - return set_net_max(value, &tipc_net_id); + " (cannot change network id once TIPC has joined a network)"); + tipc_net_id = value; + return tipc_cfg_reply_none(); } struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area, @@ -649,7 +642,7 @@ static void cfg_named_msg_event(void *userdata, if ((size < sizeof(*req_hdr)) || (size != TCM_ALIGN(ntohl(req_hdr->tcm_len))) || (ntohs(req_hdr->tcm_flags) != TCM_F_REQUEST)) { - warn("discarded invalid configuration message\n"); + warn("Invalid configuration message discarded\n"); return; } diff --git a/net/tipc/core.c b/net/tipc/core.c index 3d0a8ee4e1d3..0539a8362858 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -2,7 +2,7 @@ * net/tipc/core.c: TIPC module code * * Copyright (c) 2003-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005-2006, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -57,7 +57,7 @@ void tipc_socket_stop(void); int tipc_netlink_start(void); void tipc_netlink_stop(void); -#define MOD_NAME "tipc_start: " +#define TIPC_MOD_VER "1.6.1" #ifndef CONFIG_TIPC_ZONES #define CONFIG_TIPC_ZONES 3 @@ -191,14 +191,15 @@ static int __init tipc_init(void) int res; tipc_log_reinit(CONFIG_TIPC_LOG); - info("Activated (compiled " __DATE__ " " __TIME__ ")\n"); + info("Activated (version " TIPC_MOD_VER + " compiled " __DATE__ " " __TIME__ ")\n"); tipc_own_addr = 0; tipc_remote_management = 1; tipc_max_publications = 10000; tipc_max_subscriptions = 2000; tipc_max_ports = delimit(CONFIG_TIPC_PORTS, 127, 65536); - tipc_max_zones = delimit(CONFIG_TIPC_ZONES, 1, 511); + tipc_max_zones = delimit(CONFIG_TIPC_ZONES, 1, 255); tipc_max_clusters = delimit(CONFIG_TIPC_CLUSTERS, 1, 1); tipc_max_nodes = delimit(CONFIG_TIPC_NODES, 8, 2047); tipc_max_slaves = delimit(CONFIG_TIPC_SLAVE_NODES, 0, 2047); @@ -224,6 +225,7 @@ module_exit(tipc_exit); MODULE_DESCRIPTION("TIPC: Transparent Inter Process Communication"); MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(TIPC_MOD_VER); /* Native TIPC API for kernel-space applications (see tipc.h) */ diff --git a/net/tipc/core.h b/net/tipc/core.h index 1f2e8b27a13f..762aac2572be 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -2,7 +2,7 @@ * net/tipc/core.h: Include file for TIPC global declarations * * Copyright (c) 2005-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005-2006, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -111,10 +111,6 @@ void tipc_dump(struct print_buf*,const char *fmt, ...); #else -#ifndef DBG_OUTPUT -#define DBG_OUTPUT NULL -#endif - /* * TIPC debug support not included: * - system messages are printed to system console @@ -129,6 +125,19 @@ void tipc_dump(struct print_buf*,const char *fmt, ...); #define msg_dbg(msg,txt) do {} while (0) #define dump(fmt,arg...) do {} while (0) + +/* + * TIPC_OUTPUT is defined to be the system console, while DBG_OUTPUT is + * the null print buffer. Thes ensures that any system or debug messages + * that are generated without using the above macros are handled correctly. + */ + +#undef TIPC_OUTPUT +#define TIPC_OUTPUT TIPC_CONS + +#undef DBG_OUTPUT +#define DBG_OUTPUT NULL + #endif @@ -288,7 +297,10 @@ static inline struct tipc_msg *buf_msg(struct sk_buff *skb) * buf_acquire - creates a TIPC message buffer * @size: message size (including TIPC header) * - * Returns a new buffer. Space is reserved for a data link header. + * Returns a new buffer with data pointers set to the specified size. + * + * NOTE: Headroom is reserved to allow prepending of a data link header. + * There may also be unrequested tailroom present at the buffer's end. */ static inline struct sk_buff *buf_acquire(u32 size) @@ -309,7 +321,7 @@ static inline struct sk_buff *buf_acquire(u32 size) * buf_discard - frees a TIPC message buffer * @skb: message buffer * - * Frees a new buffer. If passed NULL, just returns. + * Frees a message buffer. If passed NULL, just returns. */ static inline void buf_discard(struct sk_buff *skb) diff --git a/net/tipc/dbg.c b/net/tipc/dbg.c index 26ef95d5fe38..55130655e1ed 100644 --- a/net/tipc/dbg.c +++ b/net/tipc/dbg.c @@ -41,7 +41,7 @@ #define MAX_STRING 512 static char print_string[MAX_STRING]; -static spinlock_t print_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(print_lock); static struct print_buf cons_buf = { NULL, 0, NULL, NULL }; struct print_buf *TIPC_CONS = &cons_buf; diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 92601385e5f5..2b8441203120 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -2,7 +2,7 @@ * net/tipc/discover.c * * Copyright (c) 2003-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005-2006, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -176,7 +176,6 @@ void tipc_disc_recv_msg(struct sk_buff *buf) n_ptr = tipc_node_create(orig); } if (n_ptr == NULL) { - warn("Memory squeeze; Failed to create node\n"); return; } spin_lock_bh(&n_ptr->lock); @@ -191,10 +190,8 @@ void tipc_disc_recv_msg(struct sk_buff *buf) } addr = &link->media_addr; if (memcmp(addr, &media_addr, sizeof(*addr))) { - char addr_string[16]; - - warn("New bearer address for %s\n", - addr_string_fill(addr_string, orig)); + warn("Resetting link <%s>, peer interface address changed\n", + link->name); memcpy(addr, &media_addr, sizeof(*addr)); tipc_link_reset(link); } @@ -270,8 +267,8 @@ static void disc_timeout(struct link_req *req) /* leave timer interval "as is" if already at a "normal" rate */ } else { req->timer_intv *= 2; - if (req->timer_intv > TIPC_LINK_REQ_SLOW) - req->timer_intv = TIPC_LINK_REQ_SLOW; + if (req->timer_intv > TIPC_LINK_REQ_FAST) + req->timer_intv = TIPC_LINK_REQ_FAST; if ((req->timer_intv == TIPC_LINK_REQ_FAST) && (req->bearer->nodes.count)) req->timer_intv = TIPC_LINK_REQ_SLOW; diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 7a252785f727..682da4a28041 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -2,7 +2,7 @@ * net/tipc/eth_media.c: Ethernet bearer support for TIPC * * Copyright (c) 2001-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005-2006, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -98,17 +98,19 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev, u32 size; if (likely(eb_ptr->bearer)) { - size = msg_size((struct tipc_msg *)buf->data); - skb_trim(buf, size); - if (likely(buf->len == size)) { - buf->next = NULL; - tipc_recv_msg(buf, eb_ptr->bearer); - } else { - kfree_skb(buf); + if (likely(!dev->promiscuity) || + !memcmp(buf->mac.raw,dev->dev_addr,ETH_ALEN) || + !memcmp(buf->mac.raw,dev->broadcast,ETH_ALEN)) { + size = msg_size((struct tipc_msg *)buf->data); + skb_trim(buf, size); + if (likely(buf->len == size)) { + buf->next = NULL; + tipc_recv_msg(buf, eb_ptr->bearer); + return TIPC_OK; + } } - } else { - kfree_skb(buf); } + kfree_skb(buf); return TIPC_OK; } @@ -125,8 +127,7 @@ static int enable_bearer(struct tipc_bearer *tb_ptr) /* Find device with specified name */ - while (dev && dev->name && - (memcmp(dev->name, driver_name, strlen(dev->name)))) { + while (dev && dev->name && strncmp(dev->name, driver_name, IFNAMSIZ)) { dev = dev->next; } if (!dev) @@ -252,7 +253,9 @@ int tipc_eth_media_start(void) if (eth_started) return -EINVAL; - memset(&bcast_addr, 0xff, sizeof(bcast_addr)); + bcast_addr.type = htonl(TIPC_MEDIA_TYPE_ETH); + memset(&bcast_addr.dev_addr, 0xff, ETH_ALEN); + memset(eth_bearers, 0, sizeof(eth_bearers)); res = tipc_register_media(TIPC_MEDIA_TYPE_ETH, "eth", diff --git a/net/tipc/handler.c b/net/tipc/handler.c index 966f70a1b608..ae6ddf00a1aa 100644 --- a/net/tipc/handler.c +++ b/net/tipc/handler.c @@ -44,7 +44,7 @@ struct queue_item { static kmem_cache_t *tipc_queue_item_cache; static struct list_head signal_queue_head; -static spinlock_t qitem_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(qitem_lock); static int handler_enabled = 0; static void process_signal_queue(unsigned long dummy); diff --git a/net/tipc/link.c b/net/tipc/link.c index 784b24b6d102..c10e18a49b96 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2,7 +2,7 @@ * net/tipc/link.c: TIPC link code * * Copyright (c) 1996-2006, Ericsson AB - * Copyright (c) 2004-2005, Wind River Systems + * Copyright (c) 2004-2006, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -419,7 +419,7 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer, l_ptr = (struct link *)kmalloc(sizeof(*l_ptr), GFP_ATOMIC); if (!l_ptr) { - warn("Memory squeeze; Failed to create link\n"); + warn("Link creation failed, no memory\n"); return NULL; } memset(l_ptr, 0, sizeof(*l_ptr)); @@ -469,7 +469,7 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer, if (!pb) { kfree(l_ptr); - warn("Memory squeeze; Failed to create link\n"); + warn("Link creation failed, no memory for print buffer\n"); return NULL; } tipc_printbuf_init(&l_ptr->print_buf, pb, LINK_LOG_BUF_SIZE); @@ -574,7 +574,6 @@ void tipc_link_wakeup_ports(struct link *l_ptr, int all) break; list_del_init(&p_ptr->wait_list); p_ptr->congested_link = NULL; - assert(p_ptr->wakeup); spin_lock_bh(p_ptr->publ.lock); p_ptr->publ.congested = 0; p_ptr->wakeup(&p_ptr->publ); @@ -691,6 +690,7 @@ void tipc_link_reset(struct link *l_ptr) struct sk_buff *buf; u32 prev_state = l_ptr->state; u32 checkpoint = l_ptr->next_in_no; + int was_active_link = tipc_link_is_active(l_ptr); msg_set_session(l_ptr->pmsg, msg_session(l_ptr->pmsg) + 1); @@ -712,7 +712,7 @@ void tipc_link_reset(struct link *l_ptr) tipc_printf(TIPC_CONS, "\nReset link <%s>\n", l_ptr->name); dbg_link_dump(); #endif - if (tipc_node_has_active_links(l_ptr->owner) && + if (was_active_link && tipc_node_has_active_links(l_ptr->owner) && l_ptr->owner->permit_changeover) { l_ptr->reset_checkpoint = checkpoint; l_ptr->exp_msg_count = START_CHANGEOVER; @@ -755,7 +755,7 @@ void tipc_link_reset(struct link *l_ptr) static void link_activate(struct link *l_ptr) { - l_ptr->next_in_no = 1; + l_ptr->next_in_no = l_ptr->stats.recv_info = 1; tipc_node_link_up(l_ptr->owner, l_ptr); tipc_bearer_add_dest(l_ptr->b_ptr, l_ptr->addr); link_send_event(tipc_cfg_link_event, l_ptr, 1); @@ -820,6 +820,8 @@ static void link_state_event(struct link *l_ptr, unsigned event) break; case RESET_MSG: dbg_link("RES -> RR\n"); + info("Resetting link <%s>, requested by peer\n", + l_ptr->name); tipc_link_reset(l_ptr); l_ptr->state = RESET_RESET; l_ptr->fsm_msg_cnt = 0; @@ -844,6 +846,8 @@ static void link_state_event(struct link *l_ptr, unsigned event) break; case RESET_MSG: dbg_link("RES -> RR\n"); + info("Resetting link <%s>, requested by peer " + "while probing\n", l_ptr->name); tipc_link_reset(l_ptr); l_ptr->state = RESET_RESET; l_ptr->fsm_msg_cnt = 0; @@ -875,6 +879,8 @@ static void link_state_event(struct link *l_ptr, unsigned event) } else { /* Link has failed */ dbg_link("-> RU (%u probes unanswered)\n", l_ptr->fsm_msg_cnt); + warn("Resetting link <%s>, peer not responding\n", + l_ptr->name); tipc_link_reset(l_ptr); l_ptr->state = RESET_UNKNOWN; l_ptr->fsm_msg_cnt = 0; @@ -982,17 +988,20 @@ static int link_bundle_buf(struct link *l_ptr, struct tipc_msg *bundler_msg = buf_msg(bundler); struct tipc_msg *msg = buf_msg(buf); u32 size = msg_size(msg); - u32 to_pos = align(msg_size(bundler_msg)); - u32 rest = link_max_pkt(l_ptr) - to_pos; + u32 bundle_size = msg_size(bundler_msg); + u32 to_pos = align(bundle_size); + u32 pad = to_pos - bundle_size; if (msg_user(bundler_msg) != MSG_BUNDLER) return 0; if (msg_type(bundler_msg) != OPEN_MSG) return 0; - if (rest < align(size)) + if (skb_tailroom(bundler) < (pad + size)) + return 0; + if (link_max_pkt(l_ptr) < (to_pos + size)) return 0; - skb_put(bundler, (to_pos - msg_size(bundler_msg)) + size); + skb_put(bundler, pad + size); memcpy(bundler->data + to_pos, buf->data, size); msg_set_size(bundler_msg, to_pos + size); msg_set_msgcnt(bundler_msg, msg_msgcnt(bundler_msg) + 1); @@ -1050,7 +1059,7 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf) msg_dbg(msg, "TIPC: Congestion, throwing away\n"); buf_discard(buf); if (imp > CONN_MANAGER) { - warn("Resetting <%s>, send queue full", l_ptr->name); + warn("Resetting link <%s>, send queue full", l_ptr->name); tipc_link_reset(l_ptr); } return dsz; @@ -1135,9 +1144,13 @@ int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector) if (n_ptr) { tipc_node_lock(n_ptr); l_ptr = n_ptr->active_links[selector & 1]; - dbg("tipc_link_send: found link %x for dest %x\n", l_ptr, dest); if (l_ptr) { + dbg("tipc_link_send: found link %x for dest %x\n", l_ptr, dest); res = tipc_link_send_buf(l_ptr, buf); + } else { + dbg("Attempt to send msg to unreachable node:\n"); + msg_dbg(buf_msg(buf),">>>"); + buf_discard(buf); } tipc_node_unlock(n_ptr); } else { @@ -1242,8 +1255,6 @@ int tipc_link_send_sections_fast(struct port *sender, int res; u32 selector = msg_origport(hdr) & 1; - assert(destaddr != tipc_own_addr); - again: /* * Try building message using port's max_pkt hint. @@ -1604,40 +1615,121 @@ void tipc_link_push_queue(struct link *l_ptr) tipc_bearer_schedule(l_ptr->b_ptr, l_ptr); } +static void link_reset_all(unsigned long addr) +{ + struct node *n_ptr; + char addr_string[16]; + u32 i; + + read_lock_bh(&tipc_net_lock); + n_ptr = tipc_node_find((u32)addr); + if (!n_ptr) { + read_unlock_bh(&tipc_net_lock); + return; /* node no longer exists */ + } + + tipc_node_lock(n_ptr); + + warn("Resetting all links to %s\n", + addr_string_fill(addr_string, n_ptr->addr)); + + for (i = 0; i < MAX_BEARERS; i++) { + if (n_ptr->links[i]) { + link_print(n_ptr->links[i], TIPC_OUTPUT, + "Resetting link\n"); + tipc_link_reset(n_ptr->links[i]); + } + } + + tipc_node_unlock(n_ptr); + read_unlock_bh(&tipc_net_lock); +} + +static void link_retransmit_failure(struct link *l_ptr, struct sk_buff *buf) +{ + struct tipc_msg *msg = buf_msg(buf); + + warn("Retransmission failure on link <%s>\n", l_ptr->name); + tipc_msg_print(TIPC_OUTPUT, msg, ">RETR-FAIL>"); + + if (l_ptr->addr) { + + /* Handle failure on standard link */ + + link_print(l_ptr, TIPC_OUTPUT, "Resetting link\n"); + tipc_link_reset(l_ptr); + + } else { + + /* Handle failure on broadcast link */ + + struct node *n_ptr; + char addr_string[16]; + + tipc_printf(TIPC_OUTPUT, "Msg seq number: %u, ", msg_seqno(msg)); + tipc_printf(TIPC_OUTPUT, "Outstanding acks: %u\n", (u32)TIPC_SKB_CB(buf)->handle); + + n_ptr = l_ptr->owner->next; + tipc_node_lock(n_ptr); + + addr_string_fill(addr_string, n_ptr->addr); + tipc_printf(TIPC_OUTPUT, "Multicast link info for %s\n", addr_string); + tipc_printf(TIPC_OUTPUT, "Supported: %d, ", n_ptr->bclink.supported); + tipc_printf(TIPC_OUTPUT, "Acked: %u\n", n_ptr->bclink.acked); + tipc_printf(TIPC_OUTPUT, "Last in: %u, ", n_ptr->bclink.last_in); + tipc_printf(TIPC_OUTPUT, "Gap after: %u, ", n_ptr->bclink.gap_after); + tipc_printf(TIPC_OUTPUT, "Gap to: %u\n", n_ptr->bclink.gap_to); + tipc_printf(TIPC_OUTPUT, "Nack sync: %u\n\n", n_ptr->bclink.nack_sync); + + tipc_k_signal((Handler)link_reset_all, (unsigned long)n_ptr->addr); + + tipc_node_unlock(n_ptr); + + l_ptr->stale_count = 0; + } +} + void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf, u32 retransmits) { struct tipc_msg *msg; + if (!buf) + return; + + msg = buf_msg(buf); + dbg("Retransmitting %u in link %x\n", retransmits, l_ptr); - if (tipc_bearer_congested(l_ptr->b_ptr, l_ptr) && buf && !skb_cloned(buf)) { - msg_dbg(buf_msg(buf), ">NO_RETR->BCONG>"); - dbg_print_link(l_ptr, " "); - l_ptr->retransm_queue_head = msg_seqno(buf_msg(buf)); - l_ptr->retransm_queue_size = retransmits; - return; + if (tipc_bearer_congested(l_ptr->b_ptr, l_ptr)) { + if (!skb_cloned(buf)) { + msg_dbg(msg, ">NO_RETR->BCONG>"); + dbg_print_link(l_ptr, " "); + l_ptr->retransm_queue_head = msg_seqno(msg); + l_ptr->retransm_queue_size = retransmits; + return; + } else { + /* Don't retransmit if driver already has the buffer */ + } + } else { + /* Detect repeated retransmit failures on uncongested bearer */ + + if (l_ptr->last_retransmitted == msg_seqno(msg)) { + if (++l_ptr->stale_count > 100) { + link_retransmit_failure(l_ptr, buf); + return; + } + } else { + l_ptr->last_retransmitted = msg_seqno(msg); + l_ptr->stale_count = 1; + } } + while (retransmits && (buf != l_ptr->next_out) && buf && !skb_cloned(buf)) { msg = buf_msg(buf); msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) { - /* Catch if retransmissions fail repeatedly: */ - if (l_ptr->last_retransmitted == msg_seqno(msg)) { - if (++l_ptr->stale_count > 100) { - tipc_msg_print(TIPC_CONS, buf_msg(buf), ">RETR>"); - info("...Retransmitted %u times\n", - l_ptr->stale_count); - link_print(l_ptr, TIPC_CONS, "Resetting Link\n"); - tipc_link_reset(l_ptr); - break; - } - } else { - l_ptr->stale_count = 0; - } - l_ptr->last_retransmitted = msg_seqno(msg); - msg_dbg(buf_msg(buf), ">RETR>"); buf = buf->next; retransmits--; @@ -1650,6 +1742,7 @@ void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf, return; } } + l_ptr->retransm_queue_head = l_ptr->retransm_queue_size = 0; } @@ -1720,6 +1813,11 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr) link_recv_non_seq(buf); continue; } + + if (unlikely(!msg_short(msg) && + (msg_destnode(msg) != tipc_own_addr))) + goto cont; + n_ptr = tipc_node_find(msg_prevnode(msg)); if (unlikely(!n_ptr)) goto cont; @@ -2140,7 +2238,7 @@ static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf) if (msg_linkprio(msg) && (msg_linkprio(msg) != l_ptr->priority)) { - warn("Changing prio <%s>: %u->%u\n", + warn("Resetting link <%s>, priority change %u->%u\n", l_ptr->name, l_ptr->priority, msg_linkprio(msg)); l_ptr->priority = msg_linkprio(msg); tipc_link_reset(l_ptr); /* Enforce change to take effect */ @@ -2209,17 +2307,22 @@ void tipc_link_tunnel(struct link *l_ptr, u32 length = msg_size(msg); tunnel = l_ptr->owner->active_links[selector & 1]; - if (!tipc_link_is_up(tunnel)) + if (!tipc_link_is_up(tunnel)) { + warn("Link changeover error, " + "tunnel link no longer available\n"); return; + } msg_set_size(tunnel_hdr, length + INT_H_SIZE); buf = buf_acquire(length + INT_H_SIZE); - if (!buf) + if (!buf) { + warn("Link changeover error, " + "unable to send tunnel msg\n"); return; + } memcpy(buf->data, (unchar *)tunnel_hdr, INT_H_SIZE); memcpy(buf->data + INT_H_SIZE, (unchar *)msg, length); dbg("%c->%c:", l_ptr->b_ptr->net_plane, tunnel->b_ptr->net_plane); msg_dbg(buf_msg(buf), ">SEND>"); - assert(tunnel); tipc_link_send_buf(tunnel, buf); } @@ -2235,23 +2338,27 @@ void tipc_link_changeover(struct link *l_ptr) u32 msgcount = l_ptr->out_queue_size; struct sk_buff *crs = l_ptr->first_out; struct link *tunnel = l_ptr->owner->active_links[0]; - int split_bundles = tipc_node_has_redundant_links(l_ptr->owner); struct tipc_msg tunnel_hdr; + int split_bundles; if (!tunnel) return; - if (!l_ptr->owner->permit_changeover) + if (!l_ptr->owner->permit_changeover) { + warn("Link changeover error, " + "peer did not permit changeover\n"); return; + } msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL, ORIGINAL_MSG, TIPC_OK, INT_H_SIZE, l_ptr->addr); msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id); msg_set_msgcnt(&tunnel_hdr, msgcount); + dbg("Link changeover requires %u tunnel messages\n", msgcount); + if (!l_ptr->first_out) { struct sk_buff *buf; - assert(!msgcount); buf = buf_acquire(INT_H_SIZE); if (buf) { memcpy(buf->data, (unchar *)&tunnel_hdr, INT_H_SIZE); @@ -2261,10 +2368,15 @@ void tipc_link_changeover(struct link *l_ptr) msg_dbg(&tunnel_hdr, "EMPTY>SEND>"); tipc_link_send_buf(tunnel, buf); } else { - warn("Memory squeeze; link changeover failed\n"); + warn("Link changeover error, " + "unable to send changeover msg\n"); } return; } + + split_bundles = (l_ptr->owner->active_links[0] != + l_ptr->owner->active_links[1]); + while (crs) { struct tipc_msg *msg = buf_msg(crs); @@ -2310,7 +2422,8 @@ void tipc_link_send_duplicate(struct link *l_ptr, struct link *tunnel) msg_set_size(&tunnel_hdr, length + INT_H_SIZE); outbuf = buf_acquire(length + INT_H_SIZE); if (outbuf == NULL) { - warn("Memory squeeze; buffer duplication failed\n"); + warn("Link changeover error, " + "unable to send duplicate msg\n"); return; } memcpy(outbuf->data, (unchar *)&tunnel_hdr, INT_H_SIZE); @@ -2364,11 +2477,15 @@ static int link_recv_changeover_msg(struct link **l_ptr, u32 msg_count = msg_msgcnt(tunnel_msg); dest_link = (*l_ptr)->owner->links[msg_bearer_id(tunnel_msg)]; - assert(dest_link != *l_ptr); if (!dest_link) { msg_dbg(tunnel_msg, "NOLINK/<REC<"); goto exit; } + if (dest_link == *l_ptr) { + err("Unexpected changeover message on link <%s>\n", + (*l_ptr)->name); + goto exit; + } dbg("%c<-%c:", dest_link->b_ptr->net_plane, (*l_ptr)->b_ptr->net_plane); *l_ptr = dest_link; @@ -2381,7 +2498,7 @@ static int link_recv_changeover_msg(struct link **l_ptr, } *buf = buf_extract(tunnel_buf,INT_H_SIZE); if (*buf == NULL) { - warn("Memory squeeze; failed to extract msg\n"); + warn("Link changeover error, duplicate msg dropped\n"); goto exit; } msg_dbg(tunnel_msg, "TNL<REC<"); @@ -2393,13 +2510,17 @@ static int link_recv_changeover_msg(struct link **l_ptr, if (tipc_link_is_up(dest_link)) { msg_dbg(tunnel_msg, "UP/FIRST/<REC<"); + info("Resetting link <%s>, changeover initiated by peer\n", + dest_link->name); tipc_link_reset(dest_link); dest_link->exp_msg_count = msg_count; + dbg("Expecting %u tunnelled messages\n", msg_count); if (!msg_count) goto exit; } else if (dest_link->exp_msg_count == START_CHANGEOVER) { msg_dbg(tunnel_msg, "BLK/FIRST/<REC<"); dest_link->exp_msg_count = msg_count; + dbg("Expecting %u tunnelled messages\n", msg_count); if (!msg_count) goto exit; } @@ -2407,6 +2528,8 @@ static int link_recv_changeover_msg(struct link **l_ptr, /* Receive original message */ if (dest_link->exp_msg_count == 0) { + warn("Link switchover error, " + "got too many tunnelled messages\n"); msg_dbg(tunnel_msg, "OVERDUE/DROP/<REC<"); dbg_print_link(dest_link, "LINK:"); goto exit; @@ -2422,7 +2545,7 @@ static int link_recv_changeover_msg(struct link **l_ptr, buf_discard(tunnel_buf); return 1; } else { - warn("Memory squeeze; dropped incoming msg\n"); + warn("Link changeover error, original msg dropped\n"); } } exit: @@ -2444,13 +2567,8 @@ void tipc_link_recv_bundle(struct sk_buff *buf) while (msgcount--) { obuf = buf_extract(buf, pos); if (obuf == NULL) { - char addr_string[16]; - - warn("Buffer allocation failure;\n"); - warn(" incoming message(s) from %s lost\n", - addr_string_fill(addr_string, - msg_orignode(buf_msg(buf)))); - return; + warn("Link unable to unbundle message(s)\n"); + break; }; pos += align(msg_size(buf_msg(obuf))); msg_dbg(buf_msg(obuf), " /"); @@ -2508,7 +2626,7 @@ int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf) } fragm = buf_acquire(fragm_sz + INT_H_SIZE); if (fragm == NULL) { - warn("Memory squeeze; failed to fragment msg\n"); + warn("Link unable to fragment message\n"); dsz = -ENOMEM; goto exit; } @@ -2623,7 +2741,7 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb, set_fragm_size(pbuf,fragm_sz); set_expected_frags(pbuf,exp_fragm_cnt - 1); } else { - warn("Memory squeeze; got no defragmenting buffer\n"); + warn("Link unable to reassemble fragmented message\n"); } buf_discard(fbuf); return 0; diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index a3bbc891f959..f0b063bcc2a9 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -127,7 +127,7 @@ void tipc_named_publish(struct publication *publ) buf = named_prepare_buf(PUBLICATION, ITEM_SIZE, 0); if (!buf) { - warn("Memory squeeze; failed to distribute publication\n"); + warn("Publication distribution failure\n"); return; } @@ -151,7 +151,7 @@ void tipc_named_withdraw(struct publication *publ) buf = named_prepare_buf(WITHDRAWAL, ITEM_SIZE, 0); if (!buf) { - warn("Memory squeeze; failed to distribute withdrawal\n"); + warn("Withdrawl distribution failure\n"); return; } @@ -174,7 +174,6 @@ void tipc_named_node_up(unsigned long node) u32 rest; u32 max_item_buf; - assert(in_own_cluster(node)); read_lock_bh(&tipc_nametbl_lock); max_item_buf = TIPC_MAX_USER_MSG_SIZE / ITEM_SIZE; max_item_buf *= ITEM_SIZE; @@ -185,8 +184,8 @@ void tipc_named_node_up(unsigned long node) left = (rest <= max_item_buf) ? rest : max_item_buf; rest -= left; buf = named_prepare_buf(PUBLICATION, left, node); - if (buf == NULL) { - warn("Memory Squeeze; could not send publication\n"); + if (!buf) { + warn("Bulk publication distribution failure\n"); goto exit; } item = (struct distr_item *)msg_data(buf_msg(buf)); @@ -221,15 +220,24 @@ exit: static void node_is_down(struct publication *publ) { struct publication *p; + write_lock_bh(&tipc_nametbl_lock); dbg("node_is_down: withdrawing %u, %u, %u\n", publ->type, publ->lower, publ->upper); publ->key += 1222345; p = tipc_nametbl_remove_publ(publ->type, publ->lower, publ->node, publ->ref, publ->key); - assert(p == publ); write_unlock_bh(&tipc_nametbl_lock); - kfree(publ); + + if (p != publ) { + err("Unable to remove publication from failed node\n" + "(type=%u, lower=%u, node=0x%x, ref=%u, key=%u)\n", + publ->type, publ->lower, publ->node, publ->ref, publ->key); + } + + if (p) { + kfree(p); + } } /** @@ -275,9 +283,15 @@ void tipc_named_recv(struct sk_buff *buf) if (publ) { tipc_nodesub_unsubscribe(&publ->subscr); kfree(publ); + } else { + err("Unable to remove publication by node 0x%x\n" + "(type=%u, lower=%u, ref=%u, key=%u)\n", + msg_orignode(msg), + ntohl(item->type), ntohl(item->lower), + ntohl(item->ref), ntohl(item->key)); } } else { - warn("tipc_named_recv: unknown msg\n"); + warn("Unrecognized name table message received\n"); } item++; } diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index d129422fc5c2..a6926ff07bcc 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -71,7 +71,7 @@ struct sub_seq { * @sseq: pointer to dynamically-sized array of sub-sequences of this 'type'; * sub-sequences are sorted in ascending order * @alloc: number of sub-sequences currently in array - * @first_free: upper bound of highest sub-sequence + 1 + * @first_free: array index of first unused sub-sequence entry * @ns_list: links to adjacent name sequences in hash chain * @subscriptions: list of subscriptions for this 'type' * @lock: spinlock controlling access to name sequence structure @@ -101,7 +101,7 @@ struct name_table { static struct name_table table = { NULL } ; static atomic_t rsv_publ_ok = ATOMIC_INIT(0); -rwlock_t tipc_nametbl_lock = RW_LOCK_UNLOCKED; +DEFINE_RWLOCK(tipc_nametbl_lock); static int hash(int x) @@ -120,7 +120,7 @@ static struct publication *publ_create(u32 type, u32 lower, u32 upper, struct publication *publ = (struct publication *)kmalloc(sizeof(*publ), GFP_ATOMIC); if (publ == NULL) { - warn("Memory squeeze; failed to create publication\n"); + warn("Publication creation failure, no memory\n"); return NULL; } @@ -165,17 +165,17 @@ static struct name_seq *tipc_nameseq_create(u32 type, struct hlist_head *seq_hea struct sub_seq *sseq = tipc_subseq_alloc(1); if (!nseq || !sseq) { - warn("Memory squeeze; failed to create name sequence\n"); + warn("Name sequence creation failed, no memory\n"); kfree(nseq); kfree(sseq); return NULL; } memset(nseq, 0, sizeof(*nseq)); - nseq->lock = SPIN_LOCK_UNLOCKED; + spin_lock_init(&nseq->lock); nseq->type = type; nseq->sseqs = sseq; - dbg("tipc_nameseq_create() nseq = %x type %u, ssseqs %x, ff: %u\n", + dbg("tipc_nameseq_create(): nseq = %p, type %u, ssseqs %p, ff: %u\n", nseq, type, nseq->sseqs, nseq->first_free); nseq->alloc = 1; INIT_HLIST_NODE(&nseq->ns_list); @@ -253,16 +253,16 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, struct sub_seq *sseq; int created_subseq = 0; - assert(nseq->first_free <= nseq->alloc); sseq = nameseq_find_subseq(nseq, lower); - dbg("nameseq_ins: for seq %x,<%u,%u>, found sseq %x\n", + dbg("nameseq_ins: for seq %p, {%u,%u}, found sseq %p\n", nseq, type, lower, sseq); if (sseq) { /* Lower end overlaps existing entry => need an exact match */ if ((sseq->lower != lower) || (sseq->upper != upper)) { - warn("Overlapping publ <%u,%u,%u>\n", type, lower, upper); + warn("Cannot publish {%u,%u,%u}, overlap error\n", + type, lower, upper); return NULL; } } else { @@ -277,25 +277,27 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, if ((inspos < nseq->first_free) && (upper >= nseq->sseqs[inspos].lower)) { - warn("Overlapping publ <%u,%u,%u>\n", type, lower, upper); + warn("Cannot publish {%u,%u,%u}, overlap error\n", + type, lower, upper); return NULL; } /* Ensure there is space for new sub-sequence */ if (nseq->first_free == nseq->alloc) { - struct sub_seq *sseqs = nseq->sseqs; - nseq->sseqs = tipc_subseq_alloc(nseq->alloc * 2); - if (nseq->sseqs != NULL) { - memcpy(nseq->sseqs, sseqs, - nseq->alloc * sizeof (struct sub_seq)); - kfree(sseqs); - dbg("Allocated %u sseqs\n", nseq->alloc); - nseq->alloc *= 2; - } else { - warn("Memory squeeze; failed to create sub-sequence\n"); + struct sub_seq *sseqs = tipc_subseq_alloc(nseq->alloc * 2); + + if (!sseqs) { + warn("Cannot publish {%u,%u,%u}, no memory\n", + type, lower, upper); return NULL; } + dbg("Allocated %u more sseqs\n", nseq->alloc); + memcpy(sseqs, nseq->sseqs, + nseq->alloc * sizeof(struct sub_seq)); + kfree(nseq->sseqs); + nseq->sseqs = sseqs; + nseq->alloc *= 2; } dbg("Have %u sseqs for type %u\n", nseq->alloc, type); @@ -311,7 +313,7 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, sseq->upper = upper; created_subseq = 1; } - dbg("inserting (%u %u %u) from %x:%u into sseq %x(%u,%u) of seq %x\n", + dbg("inserting {%u,%u,%u} from <0x%x:%u> into sseq %p(%u,%u) of seq %p\n", type, lower, upper, node, port, sseq, sseq->lower, sseq->upper, nseq); @@ -320,7 +322,7 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, publ = publ_create(type, lower, upper, scope, node, port, key); if (!publ) return NULL; - dbg("inserting publ %x, node=%x publ->node=%x, subscr->node=%x\n", + dbg("inserting publ %p, node=0x%x publ->node=0x%x, subscr->node=%p\n", publ, node, publ->node, publ->subscr.node); if (!sseq->zone_list) @@ -367,45 +369,47 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, /** * tipc_nameseq_remove_publ - + * + * NOTE: There may be cases where TIPC is asked to remove a publication + * that is not in the name table. For example, if another node issues a + * publication for a name sequence that overlaps an existing name sequence + * the publication will not be recorded, which means the publication won't + * be found when the name sequence is later withdrawn by that node. + * A failed withdraw request simply returns a failure indication and lets the + * caller issue any error or warning messages associated with such a problem. */ static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 inst, u32 node, u32 ref, u32 key) { struct publication *publ; + struct publication *curr; struct publication *prev; struct sub_seq *sseq = nameseq_find_subseq(nseq, inst); struct sub_seq *free; struct subscription *s, *st; int removed_subseq = 0; - assert(nseq); - - if (!sseq) { - int i; - - warn("Withdraw unknown <%u,%u>?\n", nseq->type, inst); - assert(nseq->sseqs); - dbg("Dumping subseqs %x for %x, alloc = %u,ff=%u\n", - nseq->sseqs, nseq, nseq->alloc, - nseq->first_free); - for (i = 0; i < nseq->first_free; i++) { - dbg("Subseq %u(%x): lower = %u,upper = %u\n", - i, &nseq->sseqs[i], nseq->sseqs[i].lower, - nseq->sseqs[i].upper); - } + if (!sseq) return NULL; - } - dbg("nameseq_remove: seq: %x, sseq %x, <%u,%u> key %u\n", + + dbg("tipc_nameseq_remove_publ: seq: %p, sseq %p, {%u,%u}, key %u\n", nseq, sseq, nseq->type, inst, key); + /* Remove publication from zone scope list */ + prev = sseq->zone_list; publ = sseq->zone_list->zone_list_next; while ((publ->key != key) || (publ->ref != ref) || (publ->node && (publ->node != node))) { prev = publ; publ = publ->zone_list_next; - assert(prev != sseq->zone_list); + if (prev == sseq->zone_list) { + + /* Prevent endless loop if publication not found */ + + return NULL; + } } if (publ != sseq->zone_list) prev->zone_list_next = publ->zone_list_next; @@ -416,14 +420,24 @@ static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 i sseq->zone_list = NULL; } + /* Remove publication from cluster scope list, if present */ + if (in_own_cluster(node)) { prev = sseq->cluster_list; - publ = sseq->cluster_list->cluster_list_next; - while ((publ->key != key) || (publ->ref != ref) || - (publ->node && (publ->node != node))) { - prev = publ; - publ = publ->cluster_list_next; - assert(prev != sseq->cluster_list); + curr = sseq->cluster_list->cluster_list_next; + while (curr != publ) { + prev = curr; + curr = curr->cluster_list_next; + if (prev == sseq->cluster_list) { + + /* Prevent endless loop for malformed list */ + + err("Unable to de-list cluster publication\n" + "{%u%u}, node=0x%x, ref=%u, key=%u)\n", + publ->type, publ->lower, publ->node, + publ->ref, publ->key); + goto end_cluster; + } } if (publ != sseq->cluster_list) prev->cluster_list_next = publ->cluster_list_next; @@ -434,15 +448,26 @@ static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 i sseq->cluster_list = NULL; } } +end_cluster: + + /* Remove publication from node scope list, if present */ if (node == tipc_own_addr) { prev = sseq->node_list; - publ = sseq->node_list->node_list_next; - while ((publ->key != key) || (publ->ref != ref) || - (publ->node && (publ->node != node))) { - prev = publ; - publ = publ->node_list_next; - assert(prev != sseq->node_list); + curr = sseq->node_list->node_list_next; + while (curr != publ) { + prev = curr; + curr = curr->node_list_next; + if (prev == sseq->node_list) { + + /* Prevent endless loop for malformed list */ + + err("Unable to de-list node publication\n" + "{%u%u}, node=0x%x, ref=%u, key=%u)\n", + publ->type, publ->lower, publ->node, + publ->ref, publ->key); + goto end_node; + } } if (publ != sseq->node_list) prev->node_list_next = publ->node_list_next; @@ -453,22 +478,18 @@ static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 i sseq->node_list = NULL; } } - assert(!publ->node || (publ->node == node)); - assert(publ->ref == ref); - assert(publ->key == key); +end_node: - /* - * Contract subseq list if no more publications: - */ - if (!sseq->node_list && !sseq->cluster_list && !sseq->zone_list) { + /* Contract subseq list if no more publications for that subseq */ + + if (!sseq->zone_list) { free = &nseq->sseqs[nseq->first_free--]; memmove(sseq, sseq + 1, (free - (sseq + 1)) * sizeof (*sseq)); removed_subseq = 1; } - /* - * Any subscriptions waiting ? - */ + /* Notify any waiting subscriptions */ + list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) { tipc_subscr_report_overlap(s, publ->lower, @@ -478,6 +499,7 @@ static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 i publ->node, removed_subseq); } + return publ; } @@ -530,7 +552,7 @@ static struct name_seq *nametbl_find_seq(u32 type) seq_head = &table.types[hash(type)]; hlist_for_each_entry(ns, seq_node, seq_head, ns_list) { if (ns->type == type) { - dbg("found %x\n", ns); + dbg("found %p\n", ns); return ns; } } @@ -543,22 +565,21 @@ struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper, { struct name_seq *seq = nametbl_find_seq(type); - dbg("ins_publ: <%u,%x,%x> found %x\n", type, lower, upper, seq); + dbg("tipc_nametbl_insert_publ: {%u,%u,%u} found %p\n", type, lower, upper, seq); if (lower > upper) { - warn("Failed to publish illegal <%u,%u,%u>\n", + warn("Failed to publish illegal {%u,%u,%u}\n", type, lower, upper); return NULL; } - dbg("Publishing <%u,%u,%u> from %x\n", type, lower, upper, node); + dbg("Publishing {%u,%u,%u} from 0x%x\n", type, lower, upper, node); if (!seq) { seq = tipc_nameseq_create(type, &table.types[hash(type)]); - dbg("tipc_nametbl_insert_publ: created %x\n", seq); + dbg("tipc_nametbl_insert_publ: created %p\n", seq); } if (!seq) return NULL; - assert(seq->type == type); return tipc_nameseq_insert_publ(seq, type, lower, upper, scope, node, port, key); } @@ -572,7 +593,7 @@ struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower, if (!seq) return NULL; - dbg("Withdrawing <%u,%u> from %x\n", type, lower, node); + dbg("Withdrawing {%u,%u} from 0x%x\n", type, lower, node); publ = tipc_nameseq_remove_publ(seq, lower, node, ref, key); if (!seq->first_free && list_empty(&seq->subscriptions)) { @@ -738,12 +759,12 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, struct publication *publ; if (table.local_publ_count >= tipc_max_publications) { - warn("Failed publish: max %u local publication\n", + warn("Publication failed, local publication limit reached (%u)\n", tipc_max_publications); return NULL; } if ((type < TIPC_RESERVED_TYPES) && !atomic_read(&rsv_publ_ok)) { - warn("Failed to publish reserved name <%u,%u,%u>\n", + warn("Publication failed, reserved name {%u,%u,%u}\n", type, lower, upper); return NULL; } @@ -767,10 +788,10 @@ int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key) { struct publication *publ; - dbg("tipc_nametbl_withdraw:<%d,%d,%d>\n", type, lower, key); + dbg("tipc_nametbl_withdraw: {%u,%u}, key=%u\n", type, lower, key); write_lock_bh(&tipc_nametbl_lock); publ = tipc_nametbl_remove_publ(type, lower, tipc_own_addr, ref, key); - if (publ) { + if (likely(publ)) { table.local_publ_count--; if (publ->scope != TIPC_NODE_SCOPE) tipc_named_withdraw(publ); @@ -780,6 +801,9 @@ int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key) return 1; } write_unlock_bh(&tipc_nametbl_lock); + err("Unable to remove local publication\n" + "(type=%u, lower=%u, ref=%u, key=%u)\n", + type, lower, ref, key); return 0; } @@ -787,8 +811,7 @@ int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key) * tipc_nametbl_subscribe - add a subscription object to the name table */ -void -tipc_nametbl_subscribe(struct subscription *s) +void tipc_nametbl_subscribe(struct subscription *s) { u32 type = s->seq.type; struct name_seq *seq; @@ -800,11 +823,13 @@ tipc_nametbl_subscribe(struct subscription *s) } if (seq){ spin_lock_bh(&seq->lock); - dbg("tipc_nametbl_subscribe:found %x for <%u,%u,%u>\n", + dbg("tipc_nametbl_subscribe:found %p for {%u,%u,%u}\n", seq, type, s->seq.lower, s->seq.upper); - assert(seq->type == type); tipc_nameseq_subscribe(seq, s); spin_unlock_bh(&seq->lock); + } else { + warn("Failed to create subscription for {%u,%u,%u}\n", + s->seq.type, s->seq.lower, s->seq.upper); } write_unlock_bh(&tipc_nametbl_lock); } @@ -813,8 +838,7 @@ tipc_nametbl_subscribe(struct subscription *s) * tipc_nametbl_unsubscribe - remove a subscription object from name table */ -void -tipc_nametbl_unsubscribe(struct subscription *s) +void tipc_nametbl_unsubscribe(struct subscription *s) { struct name_seq *seq; @@ -1049,35 +1073,20 @@ int tipc_nametbl_init(void) void tipc_nametbl_stop(void) { - struct hlist_head *seq_head; - struct hlist_node *seq_node; - struct hlist_node *tmp; - struct name_seq *seq; u32 i; if (!table.types) return; + /* Verify name table is empty, then release it */ + write_lock_bh(&tipc_nametbl_lock); for (i = 0; i < tipc_nametbl_size; i++) { - seq_head = &table.types[i]; - hlist_for_each_entry_safe(seq, seq_node, tmp, seq_head, ns_list) { - struct sub_seq *sseq = seq->sseqs; - - for (; sseq != &seq->sseqs[seq->first_free]; sseq++) { - struct publication *publ = sseq->zone_list; - assert(publ); - do { - struct publication *next = - publ->zone_list_next; - kfree(publ); - publ = next; - } - while (publ != sseq->zone_list); - } - } + if (!hlist_empty(&table.types[i])) + err("tipc_nametbl_stop(): hash chain %u is non-null\n", i); } kfree(table.types); table.types = NULL; write_unlock_bh(&tipc_nametbl_lock); } + diff --git a/net/tipc/net.c b/net/tipc/net.c index f7c8223ddf7d..e5a359ab4930 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -115,7 +115,7 @@ * - A local spin_lock protecting the queue of subscriber events. */ -rwlock_t tipc_net_lock = RW_LOCK_UNLOCKED; +DEFINE_RWLOCK(tipc_net_lock); struct network tipc_net = { NULL }; struct node *tipc_net_select_remote_node(u32 addr, u32 ref) diff --git a/net/tipc/node.c b/net/tipc/node.c index 0d5db06e203f..fc6d09630ccd 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -2,7 +2,7 @@ * net/tipc/node.c: TIPC node management routines * * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005-2006, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -61,34 +61,37 @@ struct node *tipc_node_create(u32 addr) struct node **curr_node; n_ptr = kmalloc(sizeof(*n_ptr),GFP_ATOMIC); - if (n_ptr != NULL) { - memset(n_ptr, 0, sizeof(*n_ptr)); - n_ptr->addr = addr; - n_ptr->lock = SPIN_LOCK_UNLOCKED; - INIT_LIST_HEAD(&n_ptr->nsub); - - c_ptr = tipc_cltr_find(addr); - if (c_ptr == NULL) - c_ptr = tipc_cltr_create(addr); - if (c_ptr != NULL) { - n_ptr->owner = c_ptr; - tipc_cltr_attach_node(c_ptr, n_ptr); - n_ptr->last_router = -1; - - /* Insert node into ordered list */ - for (curr_node = &tipc_nodes; *curr_node; - curr_node = &(*curr_node)->next) { - if (addr < (*curr_node)->addr) { - n_ptr->next = *curr_node; - break; - } - } - (*curr_node) = n_ptr; - } else { - kfree(n_ptr); - n_ptr = NULL; - } - } + if (!n_ptr) { + warn("Node creation failed, no memory\n"); + return NULL; + } + + c_ptr = tipc_cltr_find(addr); + if (!c_ptr) { + c_ptr = tipc_cltr_create(addr); + } + if (!c_ptr) { + kfree(n_ptr); + return NULL; + } + + memset(n_ptr, 0, sizeof(*n_ptr)); + n_ptr->addr = addr; + spin_lock_init(&n_ptr->lock); + INIT_LIST_HEAD(&n_ptr->nsub); + n_ptr->owner = c_ptr; + tipc_cltr_attach_node(c_ptr, n_ptr); + n_ptr->last_router = -1; + + /* Insert node into ordered list */ + for (curr_node = &tipc_nodes; *curr_node; + curr_node = &(*curr_node)->next) { + if (addr < (*curr_node)->addr) { + n_ptr->next = *curr_node; + break; + } + } + (*curr_node) = n_ptr; return n_ptr; } @@ -122,6 +125,8 @@ void tipc_node_link_up(struct node *n_ptr, struct link *l_ptr) { struct link **active = &n_ptr->active_links[0]; + n_ptr->working_links++; + info("Established link <%s> on network plane %c\n", l_ptr->name, l_ptr->b_ptr->net_plane); @@ -132,7 +137,7 @@ void tipc_node_link_up(struct node *n_ptr, struct link *l_ptr) return; } if (l_ptr->priority < active[0]->priority) { - info("Link is standby\n"); + info("New link <%s> becomes standby\n", l_ptr->name); return; } tipc_link_send_duplicate(active[0], l_ptr); @@ -140,8 +145,9 @@ void tipc_node_link_up(struct node *n_ptr, struct link *l_ptr) active[0] = l_ptr; return; } - info("Link <%s> on network plane %c becomes standby\n", - active[0]->name, active[0]->b_ptr->net_plane); + info("Old link <%s> becomes standby\n", active[0]->name); + if (active[1] != active[0]) + info("Old link <%s> becomes standby\n", active[1]->name); active[0] = active[1] = l_ptr; } @@ -181,6 +187,8 @@ void tipc_node_link_down(struct node *n_ptr, struct link *l_ptr) { struct link **active; + n_ptr->working_links--; + if (!tipc_link_is_active(l_ptr)) { info("Lost standby link <%s> on network plane %c\n", l_ptr->name, l_ptr->b_ptr->net_plane); @@ -210,8 +218,7 @@ int tipc_node_has_active_links(struct node *n_ptr) int tipc_node_has_redundant_links(struct node *n_ptr) { - return (tipc_node_has_active_links(n_ptr) && - (n_ptr->active_links[0] != n_ptr->active_links[1])); + return (n_ptr->working_links > 1); } static int tipc_node_has_active_routes(struct node *n_ptr) @@ -234,7 +241,6 @@ struct node *tipc_node_attach_link(struct link *l_ptr) u32 bearer_id = l_ptr->b_ptr->identity; char addr_string[16]; - assert(bearer_id < MAX_BEARERS); if (n_ptr->link_cnt >= 2) { char addr_string[16]; @@ -249,7 +255,7 @@ struct node *tipc_node_attach_link(struct link *l_ptr) n_ptr->link_cnt++; return n_ptr; } - err("Attempt to establish second link on <%s> to <%s> \n", + err("Attempt to establish second link on <%s> to %s \n", l_ptr->b_ptr->publ.name, addr_string_fill(addr_string, l_ptr->addr)); } @@ -314,7 +320,7 @@ static void node_established_contact(struct node *n_ptr) struct cluster *c_ptr; dbg("node_established_contact:-> %x\n", n_ptr->addr); - if (!tipc_node_has_active_routes(n_ptr)) { + if (!tipc_node_has_active_routes(n_ptr) && in_own_cluster(n_ptr->addr)) { tipc_k_signal((Handler)tipc_named_node_up, n_ptr->addr); } @@ -586,6 +592,7 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) struct sk_buff *buf; struct node *n_ptr; struct tipc_node_info node_info; + u32 payload_size; if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR)) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); @@ -602,8 +609,11 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) /* For now, get space for all other nodes (will need to modify this when slave nodes are supported */ - buf = tipc_cfg_reply_alloc(TLV_SPACE(sizeof(node_info)) * - (tipc_max_nodes - 1)); + payload_size = TLV_SPACE(sizeof(node_info)) * (tipc_max_nodes - 1); + if (payload_size > 32768u) + return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED + " (too many nodes)"); + buf = tipc_cfg_reply_alloc(payload_size); if (!buf) return NULL; @@ -627,6 +637,7 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) struct sk_buff *buf; struct node *n_ptr; struct tipc_link_info link_info; + u32 payload_size; if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR)) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); @@ -639,12 +650,15 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) if (!tipc_nodes) return tipc_cfg_reply_none(); - - /* For now, get space for 2 links to all other nodes + bcast link - (will need to modify this when slave nodes are supported */ - - buf = tipc_cfg_reply_alloc(TLV_SPACE(sizeof(link_info)) * - (2 * (tipc_max_nodes - 1) + 1)); + + /* Get space for all unicast links + multicast link */ + + payload_size = TLV_SPACE(sizeof(link_info)) * + (tipc_net.zones[tipc_zone(tipc_own_addr)]->links + 1); + if (payload_size > 32768u) + return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED + " (too many links)"); + buf = tipc_cfg_reply_alloc(payload_size); if (!buf) return NULL; diff --git a/net/tipc/node.h b/net/tipc/node.h index 781126e084ae..a07cc79ea637 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -51,6 +51,7 @@ * @nsub: list of "node down" subscriptions monitoring node * @active_links: pointers to active links to node * @links: pointers to all links to node + * @working_links: number of working links to node (both active and standby) * @link_cnt: number of links to node * @permit_changeover: non-zero if node has redundant links to this system * @routers: bitmap (used for multicluster communication) @@ -76,6 +77,7 @@ struct node { struct link *active_links[2]; struct link *links[MAX_BEARERS]; int link_cnt; + int working_links; int permit_changeover; u32 routers[512/32]; int last_router; diff --git a/net/tipc/node_subscr.c b/net/tipc/node_subscr.c index cff4068cc755..cc3fff3dec4f 100644 --- a/net/tipc/node_subscr.c +++ b/net/tipc/node_subscr.c @@ -47,18 +47,19 @@ void tipc_nodesub_subscribe(struct node_subscr *node_sub, u32 addr, void *usr_handle, net_ev_handler handle_down) { - node_sub->node = NULL; - if (addr == tipc_own_addr) + if (addr == tipc_own_addr) { + node_sub->node = NULL; return; - if (!tipc_addr_node_valid(addr)) { - warn("node_subscr with illegal %x\n", addr); + } + + node_sub->node = tipc_node_find(addr); + if (!node_sub->node) { + warn("Node subscription rejected, unknown node 0x%x\n", addr); return; } - node_sub->handle_node_down = handle_down; node_sub->usr_handle = usr_handle; - node_sub->node = tipc_node_find(addr); - assert(node_sub->node); + tipc_node_lock(node_sub->node); list_add_tail(&node_sub->nodesub_list, &node_sub->node->nsub); tipc_node_unlock(node_sub->node); diff --git a/net/tipc/port.c b/net/tipc/port.c index 67e96cb1e825..3251c8d8e53c 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -57,8 +57,8 @@ static struct sk_buff *msg_queue_head = NULL; static struct sk_buff *msg_queue_tail = NULL; -spinlock_t tipc_port_list_lock = SPIN_LOCK_UNLOCKED; -static spinlock_t queue_lock = SPIN_LOCK_UNLOCKED; +DEFINE_SPINLOCK(tipc_port_list_lock); +static DEFINE_SPINLOCK(queue_lock); static LIST_HEAD(ports); static void port_handle_node_down(unsigned long ref); @@ -168,7 +168,6 @@ void tipc_port_recv_mcast(struct sk_buff *buf, struct port_list *dp) struct port_list *item = dp; int cnt = 0; - assert(buf); msg = buf_msg(buf); /* Create destination port list, if one wasn't supplied */ @@ -196,7 +195,7 @@ void tipc_port_recv_mcast(struct sk_buff *buf, struct port_list *dp) struct sk_buff *b = skb_clone(buf, GFP_ATOMIC); if (b == NULL) { - warn("Buffer allocation failure\n"); + warn("Unable to deliver multicast message(s)\n"); msg_dbg(msg, "LOST:"); goto exit; } @@ -228,14 +227,14 @@ u32 tipc_createport_raw(void *usr_handle, u32 ref; p_ptr = kmalloc(sizeof(*p_ptr), GFP_ATOMIC); - if (p_ptr == NULL) { - warn("Memory squeeze; failed to create port\n"); + if (!p_ptr) { + warn("Port creation failed, no memory\n"); return 0; } memset(p_ptr, 0, sizeof(*p_ptr)); ref = tipc_ref_acquire(p_ptr, &p_ptr->publ.lock); if (!ref) { - warn("Reference Table Exhausted\n"); + warn("Port creation failed, reference table exhausted\n"); kfree(p_ptr); return 0; } @@ -810,18 +809,20 @@ static void port_dispatcher_sigh(void *dummy) void *usr_handle; int connected; int published; + u32 message_type; struct sk_buff *next = buf->next; struct tipc_msg *msg = buf_msg(buf); u32 dref = msg_destport(msg); + message_type = msg_type(msg); + if (message_type > TIPC_DIRECT_MSG) + goto reject; /* Unsupported message type */ + p_ptr = tipc_port_lock(dref); - if (!p_ptr) { - /* Port deleted while msg in queue */ - tipc_reject_msg(buf, TIPC_ERR_NO_PORT); - buf = next; - continue; - } + if (!p_ptr) + goto reject; /* Port deleted while msg in queue */ + orig.ref = msg_origport(msg); orig.node = msg_orignode(msg); up_ptr = p_ptr->user_port; @@ -832,7 +833,7 @@ static void port_dispatcher_sigh(void *dummy) if (unlikely(msg_errcode(msg))) goto err; - switch (msg_type(msg)) { + switch (message_type) { case TIPC_CONN_MSG:{ tipc_conn_msg_event cb = up_ptr->conn_msg_cb; @@ -874,6 +875,7 @@ static void port_dispatcher_sigh(void *dummy) &orig); break; } + case TIPC_MCAST_MSG: case TIPC_NAMED_MSG:{ tipc_named_msg_event cb = up_ptr->named_msg_cb; @@ -886,7 +888,8 @@ static void port_dispatcher_sigh(void *dummy) goto reject; dseq.type = msg_nametype(msg); dseq.lower = msg_nameinst(msg); - dseq.upper = dseq.lower; + dseq.upper = (message_type == TIPC_NAMED_MSG) + ? dseq.lower : msg_nameupper(msg); skb_pull(buf, msg_hdr_sz(msg)); cb(usr_handle, dref, &buf, msg_data(msg), msg_data_sz(msg), msg_importance(msg), @@ -899,7 +902,7 @@ static void port_dispatcher_sigh(void *dummy) buf = next; continue; err: - switch (msg_type(msg)) { + switch (message_type) { case TIPC_CONN_MSG:{ tipc_conn_shutdown_event cb = @@ -931,6 +934,7 @@ err: msg_data_sz(msg), msg_errcode(msg), &orig); break; } + case TIPC_MCAST_MSG: case TIPC_NAMED_MSG:{ tipc_named_msg_err_event cb = up_ptr->named_err_cb; @@ -940,7 +944,8 @@ err: break; dseq.type = msg_nametype(msg); dseq.lower = msg_nameinst(msg); - dseq.upper = dseq.lower; + dseq.upper = (message_type == TIPC_NAMED_MSG) + ? dseq.lower : msg_nameupper(msg); skb_pull(buf, msg_hdr_sz(msg)); cb(usr_handle, dref, &buf, msg_data(msg), msg_data_sz(msg), msg_errcode(msg), &dseq); @@ -1054,7 +1059,8 @@ int tipc_createport(u32 user_ref, u32 ref; up_ptr = (struct user_port *)kmalloc(sizeof(*up_ptr), GFP_ATOMIC); - if (up_ptr == NULL) { + if (!up_ptr) { + warn("Port creation failed, no memory\n"); return -ENOMEM; } ref = tipc_createport_raw(NULL, port_dispatcher, port_wakeup, importance); @@ -1165,8 +1171,6 @@ int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) p_ptr = tipc_port_lock(ref); if (!p_ptr) return -EINVAL; - if (!p_ptr->publ.published) - goto exit; if (!seq) { list_for_each_entry_safe(publ, tpubl, &p_ptr->publications, pport_list) { @@ -1193,7 +1197,6 @@ int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) } if (list_empty(&p_ptr->publications)) p_ptr->publ.published = 0; -exit: tipc_port_unlock(p_ptr); return res; } diff --git a/net/tipc/ref.c b/net/tipc/ref.c index 33bbf5095094..596d3c8ff750 100644 --- a/net/tipc/ref.c +++ b/net/tipc/ref.c @@ -63,7 +63,7 @@ struct ref_table tipc_ref_table = { NULL }; -static rwlock_t ref_table_lock = RW_LOCK_UNLOCKED; +static DEFINE_RWLOCK(ref_table_lock); /** * tipc_ref_table_init - create reference table for objects @@ -87,7 +87,7 @@ int tipc_ref_table_init(u32 requested_size, u32 start) index_mask = sz - 1; for (i = sz - 1; i >= 0; i--) { table[i].object = NULL; - table[i].lock = SPIN_LOCK_UNLOCKED; + spin_lock_init(&table[i].lock); table[i].data.next_plus_upper = (start & ~index_mask) + i - 1; } tipc_ref_table.entries = table; @@ -127,7 +127,14 @@ u32 tipc_ref_acquire(void *object, spinlock_t **lock) u32 next_plus_upper; u32 reference = 0; - assert(tipc_ref_table.entries && object); + if (!object) { + err("Attempt to acquire reference to non-existent object\n"); + return 0; + } + if (!tipc_ref_table.entries) { + err("Reference table not found during acquisition attempt\n"); + return 0; + } write_lock_bh(&ref_table_lock); if (tipc_ref_table.first_free) { @@ -162,15 +169,28 @@ void tipc_ref_discard(u32 ref) u32 index; u32 index_mask; - assert(tipc_ref_table.entries); - assert(ref != 0); + if (!ref) { + err("Attempt to discard reference 0\n"); + return; + } + if (!tipc_ref_table.entries) { + err("Reference table not found during discard attempt\n"); + return; + } write_lock_bh(&ref_table_lock); index_mask = tipc_ref_table.index_mask; index = ref & index_mask; entry = &(tipc_ref_table.entries[index]); - assert(entry->object != 0); - assert(entry->data.reference == ref); + + if (!entry->object) { + err("Attempt to discard reference to non-existent object\n"); + goto exit; + } + if (entry->data.reference != ref) { + err("Attempt to discard non-existent reference\n"); + goto exit; + } /* mark entry as unused */ entry->object = NULL; @@ -184,6 +204,7 @@ void tipc_ref_discard(u32 ref) /* increment upper bits of entry to invalidate subsequent references */ entry->data.next_plus_upper = (ref & ~index_mask) + (index_mask + 1); +exit: write_unlock_bh(&ref_table_lock); } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 648a734e6044..32d778448a00 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -169,12 +169,6 @@ static int tipc_create(struct socket *sock, int protocol) struct sock *sk; u32 ref; - if ((sock->type != SOCK_STREAM) && - (sock->type != SOCK_SEQPACKET) && - (sock->type != SOCK_DGRAM) && - (sock->type != SOCK_RDM)) - return -EPROTOTYPE; - if (unlikely(protocol != 0)) return -EPROTONOSUPPORT; @@ -199,6 +193,9 @@ static int tipc_create(struct socket *sock, int protocol) sock->ops = &msg_ops; sock->state = SS_READY; break; + default: + tipc_deleteport(ref); + return -EPROTOTYPE; } sk = sk_alloc(AF_TIPC, GFP_KERNEL, &tipc_proto, 1); @@ -426,7 +423,7 @@ static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m) if (copy_from_user(&hdr, m->msg_iov[0].iov_base, sizeof(hdr))) return -EFAULT; - if ((ntohs(hdr.tcm_type) & 0xC000) & (!capable(CAP_NET_ADMIN))) + if ((ntohs(hdr.tcm_type) & 0xC000) && (!capable(CAP_NET_ADMIN))) return -EACCES; return 0; @@ -437,7 +434,7 @@ static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m) * @iocb: (unused) * @sock: socket structure * @m: message to send - * @total_len: (unused) + * @total_len: length of message * * Message must have an destination specified explicitly. * Used for SOCK_RDM and SOCK_DGRAM messages, @@ -458,7 +455,8 @@ static int send_msg(struct kiocb *iocb, struct socket *sock, if (unlikely(!dest)) return -EDESTADDRREQ; - if (unlikely(dest->family != AF_TIPC)) + if (unlikely((m->msg_namelen < sizeof(*dest)) || + (dest->family != AF_TIPC))) return -EINVAL; needs_conn = (sock->state != SS_READY); @@ -470,6 +468,10 @@ static int send_msg(struct kiocb *iocb, struct socket *sock, if ((tsock->p->published) || ((sock->type == SOCK_STREAM) && (total_len != 0))) return -EOPNOTSUPP; + if (dest->addrtype == TIPC_ADDR_NAME) { + tsock->p->conn_type = dest->addr.name.name.type; + tsock->p->conn_instance = dest->addr.name.name.instance; + } } if (down_interruptible(&tsock->sem)) @@ -538,7 +540,7 @@ exit: * @iocb: (unused) * @sock: socket structure * @m: message to send - * @total_len: (unused) + * @total_len: length of message * * Used for SOCK_SEQPACKET messages and SOCK_STREAM data. * @@ -561,15 +563,15 @@ static int send_packet(struct kiocb *iocb, struct socket *sock, return -ERESTARTSYS; } - if (unlikely(sock->state != SS_CONNECTED)) { - if (sock->state == SS_DISCONNECTING) - res = -EPIPE; - else - res = -ENOTCONN; - goto exit; - } - do { + if (unlikely(sock->state != SS_CONNECTED)) { + if (sock->state == SS_DISCONNECTING) + res = -EPIPE; + else + res = -ENOTCONN; + goto exit; + } + res = tipc_send(tsock->p->ref, m->msg_iovlen, m->msg_iov); if (likely(res != -ELINKCONG)) { exit: @@ -597,7 +599,8 @@ exit: * * Used for SOCK_STREAM data. * - * Returns the number of bytes sent on success, or errno otherwise + * Returns the number of bytes sent on success (or partial success), + * or errno if no data sent */ @@ -611,6 +614,7 @@ static int send_stream(struct kiocb *iocb, struct socket *sock, char __user *curr_start; int curr_left; int bytes_to_send; + int bytes_sent; int res; if (likely(total_len <= TIPC_MAX_USER_MSG_SIZE)) @@ -633,11 +637,11 @@ static int send_stream(struct kiocb *iocb, struct socket *sock, * of small iovec entries into send_packet(). */ - my_msg = *m; - curr_iov = my_msg.msg_iov; - curr_iovlen = my_msg.msg_iovlen; + curr_iov = m->msg_iov; + curr_iovlen = m->msg_iovlen; my_msg.msg_iov = &my_iov; my_msg.msg_iovlen = 1; + bytes_sent = 0; while (curr_iovlen--) { curr_start = curr_iov->iov_base; @@ -648,16 +652,18 @@ static int send_stream(struct kiocb *iocb, struct socket *sock, ? curr_left : TIPC_MAX_USER_MSG_SIZE; my_iov.iov_base = curr_start; my_iov.iov_len = bytes_to_send; - if ((res = send_packet(iocb, sock, &my_msg, 0)) < 0) - return res; + if ((res = send_packet(iocb, sock, &my_msg, 0)) < 0) { + return bytes_sent ? bytes_sent : res; + } curr_left -= bytes_to_send; curr_start += bytes_to_send; + bytes_sent += bytes_to_send; } curr_iov++; } - return total_len; + return bytes_sent; } /** @@ -727,6 +733,7 @@ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg, u32 anc_data[3]; u32 err; u32 dest_type; + int has_name; int res; if (likely(m->msg_controllen == 0)) @@ -738,10 +745,10 @@ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg, if (unlikely(err)) { anc_data[0] = err; anc_data[1] = msg_data_sz(msg); - if ((res = put_cmsg(m, SOL_SOCKET, TIPC_ERRINFO, 8, anc_data))) + if ((res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data))) return res; if (anc_data[1] && - (res = put_cmsg(m, SOL_SOCKET, TIPC_RETDATA, anc_data[1], + (res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1], msg_data(msg)))) return res; } @@ -751,25 +758,28 @@ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg, dest_type = msg ? msg_type(msg) : TIPC_DIRECT_MSG; switch (dest_type) { case TIPC_NAMED_MSG: + has_name = 1; anc_data[0] = msg_nametype(msg); anc_data[1] = msg_namelower(msg); anc_data[2] = msg_namelower(msg); break; case TIPC_MCAST_MSG: + has_name = 1; anc_data[0] = msg_nametype(msg); anc_data[1] = msg_namelower(msg); anc_data[2] = msg_nameupper(msg); break; case TIPC_CONN_MSG: + has_name = (tport->conn_type != 0); anc_data[0] = tport->conn_type; anc_data[1] = tport->conn_instance; anc_data[2] = tport->conn_instance; break; default: - anc_data[0] = 0; + has_name = 0; } - if (anc_data[0] && - (res = put_cmsg(m, SOL_SOCKET, TIPC_DESTNAME, 12, anc_data))) + if (has_name && + (res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data))) return res; return 0; @@ -960,7 +970,7 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock, restart: if (unlikely((skb_queue_len(&sock->sk->sk_receive_queue) == 0) && (flags & MSG_DONTWAIT))) { - res = (sz_copied == 0) ? -EWOULDBLOCK : 0; + res = -EWOULDBLOCK; goto exit; } @@ -1051,7 +1061,7 @@ restart: exit: up(&tsock->sem); - return res ? res : sz_copied; + return sz_copied ? sz_copied : res; } /** @@ -1236,7 +1246,8 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen, if (sock->state == SS_READY) return -EOPNOTSUPP; - /* MOVE THE REST OF THIS ERROR CHECKING TO send_msg()? */ + /* Issue Posix-compliant error code if socket is in the wrong state */ + if (sock->state == SS_LISTENING) return -EOPNOTSUPP; if (sock->state == SS_CONNECTING) @@ -1244,13 +1255,20 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen, if (sock->state != SS_UNCONNECTED) return -EISCONN; - if ((dst->family != AF_TIPC) || - ((dst->addrtype != TIPC_ADDR_NAME) && (dst->addrtype != TIPC_ADDR_ID))) + /* + * Reject connection attempt using multicast address + * + * Note: send_msg() validates the rest of the address fields, + * so there's no need to do it here + */ + + if (dst->addrtype == TIPC_ADDR_MCAST) return -EINVAL; /* Send a 'SYN-' to destination */ m.msg_name = dest; + m.msg_namelen = destlen; if ((res = send_msg(NULL, sock, &m, 0)) < 0) { sock->state = SS_DISCONNECTING; return res; @@ -1269,10 +1287,6 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen, msg = buf_msg(buf); res = auto_connect(sock, tsock, msg); if (!res) { - if (dst->addrtype == TIPC_ADDR_NAME) { - tsock->p->conn_type = dst->addr.name.name.type; - tsock->p->conn_instance = dst->addr.name.name.instance; - } if (!msg_data_sz(msg)) advance_queue(tsock); } @@ -1386,7 +1400,7 @@ exit: /** * shutdown - shutdown socket connection * @sock: socket structure - * @how: direction to close (always treated as read + write) + * @how: direction to close (unused; always treated as read + write) * * Terminates connection (if necessary), then purges socket's receive queue. * @@ -1469,7 +1483,8 @@ restart: * Returns 0 on success, errno otherwise */ -static int setsockopt(struct socket *sock, int lvl, int opt, char *ov, int ol) +static int setsockopt(struct socket *sock, + int lvl, int opt, char __user *ov, int ol) { struct tipc_sock *tsock = tipc_sk(sock->sk); u32 value; @@ -1525,7 +1540,8 @@ static int setsockopt(struct socket *sock, int lvl, int opt, char *ov, int ol) * Returns 0 on success, errno otherwise */ -static int getsockopt(struct socket *sock, int lvl, int opt, char *ov, int *ol) +static int getsockopt(struct socket *sock, + int lvl, int opt, char __user *ov, int *ol) { struct tipc_sock *tsock = tipc_sk(sock->sk); int len; diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index c5f026c7fd38..e19b4bcd67ec 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -266,7 +266,8 @@ static void subscr_subscribe(struct tipc_subscr *s, /* Refuse subscription if global limit exceeded */ if (atomic_read(&topsrv.subscription_count) >= tipc_max_subscriptions) { - warn("Failed: max %u subscriptions\n", tipc_max_subscriptions); + warn("Subscription rejected, subscription limit reached (%u)\n", + tipc_max_subscriptions); subscr_terminate(subscriber); return; } @@ -274,8 +275,8 @@ static void subscr_subscribe(struct tipc_subscr *s, /* Allocate subscription object */ sub = kmalloc(sizeof(*sub), GFP_ATOMIC); - if (sub == NULL) { - warn("Memory squeeze; ignoring subscription\n"); + if (!sub) { + warn("Subscription rejected, no memory\n"); subscr_terminate(subscriber); return; } @@ -298,8 +299,7 @@ static void subscr_subscribe(struct tipc_subscr *s, if ((((sub->filter != TIPC_SUB_PORTS) && (sub->filter != TIPC_SUB_SERVICE))) || (sub->seq.lower > sub->seq.upper)) { - warn("Rejecting illegal subscription %u,%u,%u\n", - sub->seq.type, sub->seq.lower, sub->seq.upper); + warn("Subscription rejected, illegal request\n"); kfree(sub); subscr_terminate(subscriber); return; @@ -387,7 +387,7 @@ static void subscr_named_msg_event(void *usr_handle, dbg("subscr_named_msg_event: orig = %x own = %x,\n", orig->node, tipc_own_addr); if (size && (size != sizeof(struct tipc_subscr))) { - warn("Received tipc_subscr of invalid size\n"); + warn("Subscriber rejected, invalid subscription size\n"); return; } @@ -395,7 +395,7 @@ static void subscr_named_msg_event(void *usr_handle, subscriber = kmalloc(sizeof(struct subscriber), GFP_ATOMIC); if (subscriber == NULL) { - warn("Memory squeeze; ignoring subscriber setup\n"); + warn("Subscriber rejected, no memory\n"); return; } memset(subscriber, 0, sizeof(struct subscriber)); @@ -403,7 +403,7 @@ static void subscr_named_msg_event(void *usr_handle, INIT_LIST_HEAD(&subscriber->subscriber_list); subscriber->ref = tipc_ref_acquire(subscriber, &subscriber->lock); if (subscriber->ref == 0) { - warn("Failed to acquire subscriber reference\n"); + warn("Subscriber rejected, reference table exhausted\n"); kfree(subscriber); return; } @@ -422,7 +422,7 @@ static void subscr_named_msg_event(void *usr_handle, NULL, &subscriber->port_ref); if (subscriber->port_ref == 0) { - warn("Memory squeeze; failed to create subscription port\n"); + warn("Subscriber rejected, unable to create port\n"); tipc_ref_discard(subscriber->ref); kfree(subscriber); return; @@ -457,7 +457,7 @@ int tipc_subscr_start(void) int res = -1; memset(&topsrv, 0, sizeof (topsrv)); - topsrv.lock = SPIN_LOCK_UNLOCKED; + spin_lock_init(&topsrv.lock); INIT_LIST_HEAD(&topsrv.subscriber_list); spin_lock_bh(&topsrv.lock); diff --git a/net/tipc/user_reg.c b/net/tipc/user_reg.c index 3f3f933976e9..1e3ae57c7228 100644 --- a/net/tipc/user_reg.c +++ b/net/tipc/user_reg.c @@ -67,7 +67,7 @@ struct tipc_user { static struct tipc_user *users = NULL; static u32 next_free_user = MAX_USERID + 1; -static spinlock_t reg_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(reg_lock); /** * reg_init - create TIPC user registry (but don't activate it) diff --git a/net/tipc/zone.c b/net/tipc/zone.c index 2803e1b4f170..316c4872ff5b 100644 --- a/net/tipc/zone.c +++ b/net/tipc/zone.c @@ -44,19 +44,24 @@ struct _zone *tipc_zone_create(u32 addr) { - struct _zone *z_ptr = NULL; + struct _zone *z_ptr; u32 z_num; - if (!tipc_addr_domain_valid(addr)) + if (!tipc_addr_domain_valid(addr)) { + err("Zone creation failed, invalid domain 0x%x\n", addr); return NULL; + } z_ptr = (struct _zone *)kmalloc(sizeof(*z_ptr), GFP_ATOMIC); - if (z_ptr != NULL) { - memset(z_ptr, 0, sizeof(*z_ptr)); - z_num = tipc_zone(addr); - z_ptr->addr = tipc_addr(z_num, 0, 0); - tipc_net.zones[z_num] = z_ptr; + if (!z_ptr) { + warn("Zone creation failed, insufficient memory\n"); + return NULL; } + + memset(z_ptr, 0, sizeof(*z_ptr)); + z_num = tipc_zone(addr); + z_ptr->addr = tipc_addr(z_num, 0, 0); + tipc_net.zones[z_num] = z_ptr; return z_ptr; } diff --git a/net/tipc/zone.h b/net/tipc/zone.h index 267999c5a240..5ab3d08602e2 100644 --- a/net/tipc/zone.h +++ b/net/tipc/zone.h @@ -2,7 +2,7 @@ * net/tipc/zone.h: Include file for TIPC zone management routines * * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005-2006, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -45,7 +45,7 @@ * struct _zone - TIPC zone structure * @addr: network address of zone * @clusters: array of pointers to all clusters within zone - * @links: (used for inter-zone communication) + * @links: number of (unicast) links to zone */ struct _zone { diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index d901465ce013..f70475bfb62a 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -83,7 +83,6 @@ */ #include <linux/module.h> -#include <linux/config.h> #include <linux/kernel.h> #include <linux/signal.h> #include <linux/sched.h> @@ -128,6 +127,30 @@ static atomic_t unix_nr_socks = ATOMIC_INIT(0); #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE) +#ifdef CONFIG_SECURITY_NETWORK +static void unix_get_peersec_dgram(struct sk_buff *skb) +{ + int err; + + err = security_socket_getpeersec_dgram(skb, UNIXSECDATA(skb), + UNIXSECLEN(skb)); + if (err) + *(UNIXSECDATA(skb)) = NULL; +} + +static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) +{ + scm->secdata = *UNIXSECDATA(skb); + scm->seclen = *UNIXSECLEN(skb); +} +#else +static inline void unix_get_peersec_dgram(struct sk_buff *skb) +{ } + +static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) +{ } +#endif /* CONFIG_SECURITY_NETWORK */ + /* * SMP locking strategy: * hash table is protected with spinlock unix_table_lock @@ -542,6 +565,14 @@ static struct proto unix_proto = { .obj_size = sizeof(struct unix_sock), }; +/* + * AF_UNIX sockets do not interact with hardware, hence they + * dont trigger interrupts - so it's safe for them to have + * bh-unsafe locking for their sk_receive_queue.lock. Split off + * this special lock-class by reinitializing the spinlock key: + */ +static struct lock_class_key af_unix_sk_receive_queue_lock_key; + static struct sock * unix_create1(struct socket *sock) { struct sock *sk = NULL; @@ -557,6 +588,8 @@ static struct sock * unix_create1(struct socket *sock) atomic_inc(&unix_nr_socks); sock_init_data(sock,sk); + lockdep_set_class(&sk->sk_receive_queue.lock, + &af_unix_sk_receive_queue_lock_key); sk->sk_write_space = unix_write_space; sk->sk_max_ack_backlog = sysctl_unix_max_dgram_qlen; @@ -1022,7 +1055,7 @@ restart: goto out_unlock; } - unix_state_wlock(sk); + unix_state_wlock_nested(sk); if (sk->sk_state != st) { unix_state_wunlock(sk); @@ -1291,6 +1324,8 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, if (siocb->scm->fp) unix_attach_fds(siocb->scm, skb); + unix_get_peersec_dgram(skb); + skb->h.raw = skb->data; err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len); if (err) @@ -1570,6 +1605,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, memset(&tmp_scm, 0, sizeof(tmp_scm)); } siocb->scm->creds = *UNIXCREDS(skb); + unix_set_secdata(siocb->scm, skb); if (!(flags & MSG_PEEK)) { diff --git a/net/wanrouter/af_wanpipe.c b/net/wanrouter/af_wanpipe.c index b1265187b4a8..a690cf773b6a 100644 --- a/net/wanrouter/af_wanpipe.c +++ b/net/wanrouter/af_wanpipe.c @@ -32,7 +32,6 @@ * ******************************************************************************/ -#include <linux/config.h> #include <linux/types.h> #include <linux/sched.h> #include <linux/mm.h> diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c index c34833dc7cc1..ad8e8a797790 100644 --- a/net/wanrouter/wanmain.c +++ b/net/wanrouter/wanmain.c @@ -42,7 +42,6 @@ * Jun 02, 1999 Gideon Hack Updates for Linux 2.0.X and 2.2.X kernels. *****************************************************************************/ -#include <linux/config.h> #include <linux/stddef.h> /* offsetof(), etc. */ #include <linux/capability.h> #include <linux/errno.h> /* return codes */ diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c index c28ba5a47209..930ea59463ad 100644 --- a/net/wanrouter/wanproc.c +++ b/net/wanrouter/wanproc.c @@ -20,7 +20,6 @@ * Dec 13, 1996 Gene Kozin Initial version (based on Sangoma's WANPIPE) *****************************************************************************/ -#include <linux/config.h> #include <linux/init.h> /* __initfunc et al. */ #include <linux/stddef.h> /* offsetof(), etc. */ #include <linux/errno.h> /* return codes */ diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 282ce4e40d7b..52a2726d327f 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -35,7 +35,6 @@ * response */ -#include <linux/config.h> #include <linux/module.h> #include <linux/capability.h> #include <linux/errno.h> diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index adfe7b8df355..47b68a301677 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -17,7 +17,6 @@ * 2000-09-04 Henner Eisen Prevent freeing a dangling skb. */ -#include <linux/config.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/skbuff.h> diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c index dfb80116c59f..a11837d361d2 100644 --- a/net/x25/x25_proc.c +++ b/net/x25/x25_proc.c @@ -17,7 +17,6 @@ * 2002/10/06 Arnaldo Carvalho de Melo seq_file support */ -#include <linux/config.h> #include <linux/init.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> diff --git a/net/x25/x25_route.c b/net/x25/x25_route.c index 6c5d37517035..2a3fe986b245 100644 --- a/net/x25/x25_route.c +++ b/net/x25/x25_route.c @@ -17,7 +17,6 @@ * X.25 001 Jonathan Naylor Started coding. */ -#include <linux/config.h> #include <linux/if_arp.h> #include <linux/init.h> #include <net/x25.h> diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 6ed3302312fb..04e1aea58bc9 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -9,7 +9,6 @@ * any later version. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/pfkeyv2.h> diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index b469c8b54613..405b741dff43 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -13,7 +13,6 @@ * */ -#include <linux/config.h> #include <linux/slab.h> #include <linux/kmod.h> #include <linux/list.h> @@ -46,45 +45,43 @@ static DEFINE_SPINLOCK(xfrm_policy_gc_lock); static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); +static struct xfrm_policy_afinfo *xfrm_policy_lock_afinfo(unsigned int family); +static void xfrm_policy_unlock_afinfo(struct xfrm_policy_afinfo *afinfo); int xfrm_register_type(struct xfrm_type *type, unsigned short family) { - struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); - struct xfrm_type_map *typemap; + struct xfrm_policy_afinfo *afinfo = xfrm_policy_lock_afinfo(family); + struct xfrm_type **typemap; int err = 0; if (unlikely(afinfo == NULL)) return -EAFNOSUPPORT; typemap = afinfo->type_map; - write_lock_bh(&typemap->lock); - if (likely(typemap->map[type->proto] == NULL)) - typemap->map[type->proto] = type; + if (likely(typemap[type->proto] == NULL)) + typemap[type->proto] = type; else err = -EEXIST; - write_unlock_bh(&typemap->lock); - xfrm_policy_put_afinfo(afinfo); + xfrm_policy_unlock_afinfo(afinfo); return err; } EXPORT_SYMBOL(xfrm_register_type); int xfrm_unregister_type(struct xfrm_type *type, unsigned short family) { - struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); - struct xfrm_type_map *typemap; + struct xfrm_policy_afinfo *afinfo = xfrm_policy_lock_afinfo(family); + struct xfrm_type **typemap; int err = 0; if (unlikely(afinfo == NULL)) return -EAFNOSUPPORT; typemap = afinfo->type_map; - write_lock_bh(&typemap->lock); - if (unlikely(typemap->map[type->proto] != type)) + if (unlikely(typemap[type->proto] != type)) err = -ENOENT; else - typemap->map[type->proto] = NULL; - write_unlock_bh(&typemap->lock); - xfrm_policy_put_afinfo(afinfo); + typemap[type->proto] = NULL; + xfrm_policy_unlock_afinfo(afinfo); return err; } EXPORT_SYMBOL(xfrm_unregister_type); @@ -92,7 +89,7 @@ EXPORT_SYMBOL(xfrm_unregister_type); struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family) { struct xfrm_policy_afinfo *afinfo; - struct xfrm_type_map *typemap; + struct xfrm_type **typemap; struct xfrm_type *type; int modload_attempted = 0; @@ -102,11 +99,9 @@ retry: return NULL; typemap = afinfo->type_map; - read_lock(&typemap->lock); - type = typemap->map[proto]; + type = typemap[proto]; if (unlikely(type && !try_module_get(type->owner))) type = NULL; - read_unlock(&typemap->lock); if (!type && !modload_attempted) { xfrm_policy_put_afinfo(afinfo); request_module("xfrm-type-%d-%d", @@ -142,6 +137,89 @@ void xfrm_put_type(struct xfrm_type *type) module_put(type->owner); } +int xfrm_register_mode(struct xfrm_mode *mode, int family) +{ + struct xfrm_policy_afinfo *afinfo; + struct xfrm_mode **modemap; + int err; + + if (unlikely(mode->encap >= XFRM_MODE_MAX)) + return -EINVAL; + + afinfo = xfrm_policy_lock_afinfo(family); + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; + + err = -EEXIST; + modemap = afinfo->mode_map; + if (likely(modemap[mode->encap] == NULL)) { + modemap[mode->encap] = mode; + err = 0; + } + + xfrm_policy_unlock_afinfo(afinfo); + return err; +} +EXPORT_SYMBOL(xfrm_register_mode); + +int xfrm_unregister_mode(struct xfrm_mode *mode, int family) +{ + struct xfrm_policy_afinfo *afinfo; + struct xfrm_mode **modemap; + int err; + + if (unlikely(mode->encap >= XFRM_MODE_MAX)) + return -EINVAL; + + afinfo = xfrm_policy_lock_afinfo(family); + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; + + err = -ENOENT; + modemap = afinfo->mode_map; + if (likely(modemap[mode->encap] == mode)) { + modemap[mode->encap] = NULL; + err = 0; + } + + xfrm_policy_unlock_afinfo(afinfo); + return err; +} +EXPORT_SYMBOL(xfrm_unregister_mode); + +struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family) +{ + struct xfrm_policy_afinfo *afinfo; + struct xfrm_mode *mode; + int modload_attempted = 0; + + if (unlikely(encap >= XFRM_MODE_MAX)) + return NULL; + +retry: + afinfo = xfrm_policy_get_afinfo(family); + if (unlikely(afinfo == NULL)) + return NULL; + + mode = afinfo->mode_map[encap]; + if (unlikely(mode && !try_module_get(mode->owner))) + mode = NULL; + if (!mode && !modload_attempted) { + xfrm_policy_put_afinfo(afinfo); + request_module("xfrm-mode-%d-%d", family, encap); + modload_attempted = 1; + goto retry; + } + + xfrm_policy_put_afinfo(afinfo); + return mode; +} + +void xfrm_put_mode(struct xfrm_mode *mode) +{ + module_put(mode->owner); +} + static inline unsigned long make_jiffies(long secs) { if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ) @@ -1306,17 +1384,31 @@ static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) return NULL; read_lock(&xfrm_policy_afinfo_lock); afinfo = xfrm_policy_afinfo[family]; - if (likely(afinfo != NULL)) - read_lock(&afinfo->lock); - read_unlock(&xfrm_policy_afinfo_lock); + if (unlikely(!afinfo)) + read_unlock(&xfrm_policy_afinfo_lock); return afinfo; } static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) { - if (unlikely(afinfo == NULL)) - return; - read_unlock(&afinfo->lock); + read_unlock(&xfrm_policy_afinfo_lock); +} + +static struct xfrm_policy_afinfo *xfrm_policy_lock_afinfo(unsigned int family) +{ + struct xfrm_policy_afinfo *afinfo; + if (unlikely(family >= NPROTO)) + return NULL; + write_lock_bh(&xfrm_policy_afinfo_lock); + afinfo = xfrm_policy_afinfo[family]; + if (unlikely(!afinfo)) + write_unlock_bh(&xfrm_policy_afinfo_lock); + return afinfo; +} + +static void xfrm_policy_unlock_afinfo(struct xfrm_policy_afinfo *afinfo) +{ + write_unlock_bh(&xfrm_policy_afinfo_lock); } static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 93a2f36ad3db..43f00fc28a3d 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -77,6 +77,8 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) kfree(x->ealg); kfree(x->calg); kfree(x->encap); + if (x->mode) + xfrm_put_mode(x->mode); if (x->type) { x->type->destructor(x); xfrm_put_type(x->type); @@ -1103,17 +1105,14 @@ static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family) return NULL; read_lock(&xfrm_state_afinfo_lock); afinfo = xfrm_state_afinfo[family]; - if (likely(afinfo != NULL)) - read_lock(&afinfo->lock); - read_unlock(&xfrm_state_afinfo_lock); + if (unlikely(!afinfo)) + read_unlock(&xfrm_state_afinfo_lock); return afinfo; } static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo) { - if (unlikely(afinfo == NULL)) - return; - read_unlock(&afinfo->lock); + read_unlock(&xfrm_state_afinfo_lock); } /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */ @@ -1165,8 +1164,6 @@ int xfrm_state_mtu(struct xfrm_state *x, int mtu) return res; } -EXPORT_SYMBOL(xfrm_state_mtu); - int xfrm_init_state(struct xfrm_state *x) { struct xfrm_state_afinfo *afinfo; @@ -1196,6 +1193,10 @@ int xfrm_init_state(struct xfrm_state *x) if (err) goto error; + x->mode = xfrm_get_mode(x->props.mode, family); + if (x->mode == NULL) + goto error; + x->km.state = XFRM_STATE_VALID; error: diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 81d1005830f4..3e6a722d072e 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -427,23 +427,25 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) if (x == NULL) return -ESRCH; + if ((err = security_xfrm_state_delete(x)) != 0) + goto out; + if (xfrm_state_kern(x)) { - xfrm_state_put(x); - return -EPERM; + err = -EPERM; + goto out; } err = xfrm_state_delete(x); - if (err < 0) { - xfrm_state_put(x); - return err; - } + if (err < 0) + goto out; c.seq = nlh->nlmsg_seq; c.pid = nlh->nlmsg_pid; c.event = nlh->nlmsg_type; km_state_notify(x, &c); - xfrm_state_put(x); +out: + xfrm_state_put(x); return err; } @@ -1055,6 +1057,8 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr MSG_DONTWAIT); } } else { + if ((err = security_xfrm_policy_delete(xp)) != 0) + goto out; c.data.byid = p->index; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; @@ -1064,6 +1068,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr xfrm_pol_put(xp); +out: return err; } @@ -1430,7 +1435,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *err link = &xfrm_dispatch[type]; /* All operations require privileges, even GET */ - if (security_netlink_recv(skb)) { + if (security_netlink_recv(skb, CAP_NET_ADMIN)) { *errp = -EPERM; return -1; } |